%%% -*-BibTeX-*- %%% ==================================================================== %%% BibTeX-file{ %%% author = "Nelson H. F. Beebe", %%% version = "1.112", %%% date = "24 December 2025", %%% time = "08:16:13 MDT", %%% filename = "vldbj.bib", %%% address = "University of Utah %%% Department of Mathematics, 110 LCB %%% 155 S 1400 E RM 233 %%% Salt Lake City, UT 84112-0090 %%% USA", %%% telephone = "+1 801 581 5254", %%% URL = "https://www.math.utah.edu/~beebe", %%% checksum = "65080 50770 260324 2577582", %%% email = "beebe at math.utah.edu, beebe at acm.org, %%% beebe at computer.org (Internet)", %%% codetable = "ISO/ASCII", %%% keywords = "BibTeX; bibliography; Very Large Data Bases %%% Journal; VLDB Journal", %%% license = "public domain", %%% supported = "yes", %%% docstring = "This is a COMPLETE bibliography of %%% publications in the VLDB journal: Very Large %%% Data Bases (CODEN VLDBFR, ISSN 1066-8888 %%% (print), 0949-877X (electronic)), originally %%% published by Springer-Verlag on behalf of the %%% VLDB Endowment, and now published by the ACM. %%% %%% Publication of the VLDB Journal begain with %%% volume 1, number 1, in 1992, and the journal %%% is normally published quarterly, although %%% occasionally, issues are combined, or volumes %%% are split across year boundaries. %%% %%% There is an editorial World Wide Web site at %%% %%% http://SunSITE.Informatik.RWTH-Aachen.DE/dblp/db/journals/vldb/ %%% %%% and publisher Web sites at %%% %%% https://dl.acm.org/loi/vldb %%% https://portal.acm.org/toc.cfm?id=J869 %%% https://link.springer.com/journal/778 %%% https://link.springer.de/link/service/journals/00778/index.htm %%% %%% At version 1.112, the year coverage looked %%% like this: %%% %%% 1992 ( 7) 2004 ( 23) 2016 ( 40) %%% 1993 ( 19) 2005 ( 22) 2017 ( 39) %%% 1994 ( 22) 2006 ( 24) 2018 ( 38) %%% 1995 ( 24) 2007 ( 26) 2019 ( 41) %%% 1996 ( 18) 2008 ( 68) 2020 ( 63) %%% 1997 ( 22) 2009 ( 53) 2021 ( 46) %%% 1998 ( 22) 2010 ( 45) 2022 ( 59) %%% 1999 ( 13) 2011 ( 41) 2023 ( 59) %%% 2000 ( 29) 2012 ( 40) 2024 ( 81) %%% 2001 ( 25) 2013 ( 38) 2025 ( 72) %%% 2002 ( 23) 2014 ( 44) %%% 2003 ( 23) 2015 ( 37) %%% %%% Article: 1246 %%% %%% Total entries: 1246 %%% %%% This bibliography was prepared largely from %%% the Web pages at the editorial and publisher %%% sites. %%% %%% Spelling has been verified with the UNIX %%% spell and GNU ispell programs using the %%% exception dictionary stored in the companion %%% file with extension .sok. %%% %%% BibTeX citation tags are uniformly chosen %%% as name:year:abbrev, where name is the %%% family name of the first author or editor, %%% year is a 4-digit number, and abbrev is a %%% 3-letter condensation of important title %%% words. Citation tags were automatically %%% generated by software developed for the %%% BibNet Project. %%% %%% In this bibliography, entries are sorted in %%% publication order within each journal, %%% using bibsort -bypages. %%% %%% The checksum field above contains a CRC-16 %%% checksum as the first value, followed by the %%% equivalent of the standard UNIX wc (word %%% count) utility output of lines, words, and %%% characters. This is produced by Robert %%% Solovay's checksum utility.", %%% } %%% ==================================================================== @Preamble{ "\ifx \undefined \Dbar \def \Dbar {\leavevmode\raise0.2ex\hbox{--}\kern-0.5emD} \fi" # "\ifx \undefined \dbar \def \dbar {\leavevmode\raise0.2ex\hbox{--}\kern-0.5emd} \fi" # "\ifx \undefined \ocirc \def \ocirc #1{{\accent'27#1}} \fi" # "\ifx \undefined \varvec \def \varvec #1{\hbox{\boldmath $#1$}} \fi" } %%% ==================================================================== %%% Acknowledgement abbreviations: @String{ack-nhfb = "Nelson H. F. Beebe, University of Utah, Department of Mathematics, 110 LCB, 155 S 1400 E RM 233, Salt Lake City, UT 84112-0090, USA, Tel: +1 801 581 5254, e-mail: \path|beebe@math.utah.edu|, \path|beebe@acm.org|, \path|beebe@computer.org| (Internet), URL: \path|https://www.math.utah.edu/~beebe/|"} %%% ==================================================================== %%% Journal abbreviations: @String{j-VLDB-J = "VLDB Journal: Very Large Data Bases"} %%% ==================================================================== %%% Bibliography entries, sorted in publication order: @Article{Breitbart:1992:TMI, author = "Yuri Breitbart and Abraham Silberschatz and Glenn R. Thompson", title = "Transaction Management Issues in a Failure-Prone Multidatabase System Environment", journal = j-VLDB-J, volume = "1", number = "1", pages = "1--39", month = jul, year = "1992", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:23 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb1.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Breitbart:Yuri.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Silberschatz:Abraham.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Thompson:Glenn_R=.html", abstract = "This paper is concerned with the problem of integrating a number of existing, off-the-shelf local database systems into a multidatabase system that maintains consistency in the face of concurrency and failures. The major difficulties in designing such systems stem from the requirements that local transactions be allowed to execute outside the multidatabase system control, and that the various local database systems cannot participate in the execution of a global commit protocol. A scheme based on the assumption that the component local database systems use the strict two-phase locking protocol is developed. Two major problems are addressed: How to ensure global transaction atomicity without the provision of a commit protocol, and how to ensure freedom from global deadlocks.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "algorithms; deadlock recovery; performance; reliability; serializibility; transaction log", xxauthor = "Yuri Breitbart and Avi Silberschatz and Glenn R. Thompson", xxpages = "1--40", } @Article{Nodine:1992:CTH, author = "Marian H. Nodine and Stanley B. Zdonik", title = "Cooperative Transaction Hierarchies: Transaction Support for Design Applications", journal = j-VLDB-J, volume = "1", number = "1", pages = "41--80", month = jul, year = "1992", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:23 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb1.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/n/Nodine:Marian_H=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/z/Zdonik:Stanley_B=.html", abstract = "Traditional atomic and nested transactions are not always well-suited to cooperative applications, such as design applications. Cooperative applications place requirements on the database that may conflict with the serializability requirement. They require transactions to be long, possibly nested, and able to interact with each other in a structured way. We define a transaction framework, called a {\em cooperative transaction hierarchy}, that allows us to relax the requirement for atomic, serializable transactions to better support cooperative applications. In cooperative transaction hierarchies, we allow the correctness specification for groups of designers to be tailored to the needs of the application. We use {\em patterns\/} and {\em conflicts\/} to specify the constraints imposed on a group's history for it to be correct. We also provide some primitives to smooth the operation of the members. We characterize deadlocks in a cooperative transaction hierarchy, and provide mechanisms for deadlock detection and resolution. We examine issues associated with failure and recovery.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "cooperation; deadlock detection; design transactions; non-serializability; transaction hierarchies; transaction synchronization; version management", } @Article{Spaccapietra:1992:MIA, author = "Stefano Spaccapietra and Christine Parent and Yann Dupont", title = "Model Independent Assertions for Integration of Heterogeneous Schemas", journal = j-VLDB-J, volume = "1", number = "1", pages = "81--126", month = jul, year = "1992", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:23 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb1.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Dupont:Yann.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Parent:Christine.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Spaccapietra:Stefano.html", abstract = "Due to the proliferation of database applications, the integration of existing databases into a distributed or federated system is one of the major challenges in responding to enterprises' information requirements. Some proposed integration techniques aim at providing database administrators (DBAs) with a view definition language they can use to build the desired integrated schema. These techniques leave to the DBA the responsibility of appropriately restructuring schema elements from existing local schemas and of solving inter-schema conflicts. This paper investigates the {\em assertion-based\/} approach, in which the DBA's action is limited to pointing out corresponding elements in the schemas and to defining the nature of the correspondence in between. This methodology is capable of: ensuring better integration by taking into account additional semantic information (assertions about links); automatically solving structural conflicts; building the integrated schema without requiring conforming of initial schemas; applying integration rules to a variety of data models; and performing view as well as database integration. This paper presents the basic ideas underlying our approach and focuses on resolution of structural conflicts.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "conceptual modeling; database design and integration; distributed databases; federated databases; heterogeneous databases; schema integration", } @Article{Hsiao:1992:FDSa, author = "David K. Hsiao", title = "Federated Databases and Systems: {Part I} --- a Tutorial on Their Data Sharing", journal = j-VLDB-J, volume = "1", number = "1", pages = "127--179", month = jul, year = "1992", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:23 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb1.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Hsiao:David_K=.html", abstract = "The issues and solutions for the interoperability of a class of heterogeneous databases and their database systems are expounded in two parts. Part I presents the data-sharing issues in federated databases and systems. Part II, which will appear in a future issue, explores resource-consolidation issues. {\em Interoperability\/} in this context refers to data sharing among heterogeneous databases, and to resource consolidation of computer hardware, system software, and support personnel. {\em Resource consolidation\/} requires the presence of a database system architecture which supports the heterogeneous system software, thereby eliminating the need for various computer hardware and support personnel. The class of heterogeneous databases and database systems expounded herein is termed {\em federated}, meaning that they are joined in order to meet certain organizational requirements and because they require their respective application specificities, integrity constraints, and security requirements to be upheld. Federated databases and systems are new. While there are no technological solutions, there has been considerable research towards their development. This tutorial is aimed at exposing the need for such solutions. A taxonomy is introduced in our review of existing research undertakings and exploratory developments. With this taxonomy, we contrast and compare various approaches to federating databases and systems.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "attribute-based; data-model-and-language-to-data-model-and-language mappings; database conversion; hierarchical; network; object-oriented; relational; schema transformation; transaction translation", xxpages = "127--180", } @Article{Breitbart:1992:OMT, author = "Yuri Breitbart and Hector Garcia-Molina and Abraham Silberschatz", title = "Overview of Multidatabase Transaction Management", journal = j-VLDB-J, volume = "1", number = "2", pages = "181--240", month = oct, year = "1992", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:23 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb1.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Breitbart:Yuri.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Garcia=Molina:Hector.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Silberschatz:Abraham.html", abstract = "A multidatabase system (MDBS) is a facility that allows users access to data located in multiple autonomous database management systems (DBMSs). In such a system, {\em global transactions\/} are executed under the control of the MDBS. Independently, {\em local transactions\/} are executed under the control of the local DBMSs. Each local DBMS integrated by the MDBS may employ a different transaction management scheme. In addition, each local DBMS has complete control over all transactions (global and local) executing at its site, including the ability to abort at any point any of the transactions executing at its site. Typically, no design or internal DBMS structure changes are allowed in order to accommodate the MDBS. Furthermore, the local DBMSs may not be aware of each other and, as a consequence, cannot coordinate their actions. Thus, traditional techniques for ensuring transaction atomicity and consistency in homogeneous distributed database systems may not be appropriate for an MDBS environment. The objective of this article is to provide a brief review of the most current work in the area of multidatabase transaction management. We first define the problem and argue that the multidatabase research will become increasingly important in the coming years. We then outline basic research issues in multidatabase transaction management and review recent results in the area. We conclude with a discussion of open problems and practical implications of this research.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "multidatabase; recovery; reliability; serializability; transaction; two-level serializability", xxauthor = "Yuri Breitbart and Hector Garcia-Molina and Avi Silberschatz", } @Article{Drew:1992:TII, author = "Pamela Drew and Roger King and Dennis Heimbigner", title = "A Toolkit for the Incremental Implementation of Heterogeneous Database Management Systems", journal = j-VLDB-J, volume = "1", number = "2", pages = "241--284", month = oct, year = "1992", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:23 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb1.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Drew:Pamela.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Heimbigner:Dennis.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/King:Roger.html", abstract = "The integration of heterogeneous database environments is a difficult and complex task. The A la carte Framework addresses this complexity by providing a reusable and extensible architecture in which a set of heterogeneous database management systems can be integrated. The goal is to support incremental integration of existing database facilities into heterogeneous, interoperative, distributed systems. The Framework addresses the three main issues in heterogeneous systems integration. First, it identifies the problems in integrating heterogeneous systems. Second, it identifies the key interfaces and parameters required for autonomous systems to interoperate correctly. Third, it demonstrates an approach to integrating these interfaces in an extensible and incremental way. The A la carte Framework provides a set of reusable, integrating components which integrate the major functional domains, such as transaction management, that could or should be integrated in heterogeneous systems. It also provides a mechanism for capturing key characteristics of the components and constraints which describe how the components can be mixed and interchanged, thereby helping to reduce the complexity of the integration process. Using this framework, we have implemented an experimental, heterogeneous configuration as part of the object management work in the software engineering research consortium, Arcadia.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "database toolkits; extensible databases; heterogeneous databases; heterogeneous transaction management; incremental integration; open architectures; reconfigurable architectures", } @Article{Hsiao:1992:FDSb, author = "David K. Hsiao", title = "Federated Databases and Systems: {Part II} --- a Tutorial on Their Resource Consolidation", journal = j-VLDB-J, volume = "1", number = "2", pages = "285--310", month = oct, year = "1992", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:23 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb1.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Hsiao:David_K=.html", abstract = "The issues and solutions for the interoperability of a class of heterogeneous databases and their database systems are expounded in two parts. Part I presented the data-sharing issues in federated databases and systems (Hsiao, 1992). The present article explores resource-consolidation issues. {\em Interoperability\/} in this context refers to data sharing among heterogeneous databases, and to resource consolidation of computer hardware, system software, and support personnel. {\em Resource consolidation\/} requires the presence of a database system architecture which supports the heterogeneous system software, thereby eliminating the need for various computer hardware and support personnel. The class of heterogeneous databases and database systems expounded herein is termed {\em federated}, meaning that they are joined in order to meet certain organizational requirements and because they require their respective application specificities, integrity constraints, and security requirements to be upheld. Federated databases and systems are new. While there are no technological solutions, there has been considerable research towards their development. This tutorial is aimed at exposing the need for such solutions. A taxonomy is introduced in our review of existing research undertakings and exploratory developments. With this taxonomy, we contrast and compare various approaches to federating databases and systems.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "attribute-based; data-model-and-language-to-data-model-and-language mappings; database conversion; hierarchical; network; object-oriented; relational; schema transformation; transaction translation", } @Article{Yu:1993:BMB, author = "Philip S. Yu and Douglas W. Cornell", title = "Buffer Management Based on Return on Consumption in a Multi-Query Environment", journal = j-VLDB-J, volume = "2", number = "1", pages = "1--37", month = jan, year = "1993", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:24 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Cornell:Douglas_W=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/y/Yu:Philip_S=.html", abstract = "In a multi-query environment, the marginal utilities of allocating additional buffer to the various queries can be vastly different. The conventional approach examines each query in isolation to determine the optimal access plan and the corresponding locality set. This can lead to performance that is far from optimal. As each query can have different access plans with dissimilar locality sets and sensitivities to memory requirement, we employ the concepts of memory consumption and return on consumption (ROC) as the basis for memory allocations. Memory consumption of a query is its space-time product, while ROC is a measure of the effectiveness of response-time reduction through additional memory consumption. A global optimization strategy using simulated annealing is developed, which minimizes the average response over all queries under the constraint that the total memory consumption rate has to be less than the buffer size. It selects the optimal join method and memory allocation for all query types simultaneously. By analyzing the way the optimal strategy makes memory allocations, a heuristic threshold strategy is then proposed. The threshold strategy is based on the concept of ROC. As the memory consumption rate by all queries is limited by the buffer size, the strategy tries to allocate the memory so as to make sure that a certain level of ROC is achieved. A simulation model is developed to demonstrate that the heuristic strategy yields performance that is very close to the optimal strategy and is far superior to the conventional allocation strategy.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "buffer management; join methods; query optimization; queueing model; simulated annealing; simulation", xxpages = "1--38", } @Article{Harder:1993:CCI, author = "Theo H{\"a}rder and Kurt Rothermel", title = "Concurrency Control Issues in Nested Transactions", journal = j-VLDB-J, volume = "2", number = "1", pages = "39--74", month = jan, year = "1993", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:24 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/H=auml=rder:Theo.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Rothermel:Kurt.html", abstract = "The concept of nested transactions offers more decomposable execution units and finer-grained control over concurrency and recovery than `flat' transactions. Furthermore, it supports the decomposition of a `unit of work' into subtasks and their appropriate distribution in a computer system as a prerequisite of intratransaction parallelism. However, to exploit its full potential, suitable granules of concurrency control as well as access modes for shared data are necessary. In this article, we investigate various issues of concurrency control for nested transactions. First, the mechanisms for cooperation and communication within nested transactions should not impede parallel execution of transactions among parent and children or among siblings. Therefore, a model for nested transactions is proposed allowing for effective exploitation of intra-transaction parallelism. Starting with a set of basic locking rules, we introduce the concept of `downward inheritance of locks' to make data manipulated by a parent available to its children. To support supervised and restricted access, this concept is refined to `controlled downward inheritance.' The initial concurrency control scheme was based on S-X locks for `flat,' non-overlapping data objects. In order to adjust this scheme for practical applications, a set of concurrency control rules is derived for generalized lock modes described by a compatibility matrix. Also, these rules are combined with a hierarchical locking scheme to improve selective access to data granules of varying sizes. After having tied together both types of hierarchies (transaction and object), it can be shown how `controlled downward inheritance' for hierarchical objects is achieved in nested transactions. Finally, problems of deadlock detection and resolution in nested transactions are considered.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "concurrency control; locking; nested transactions; object hierarchies", } @Article{Jensen:1993:UDT, author = "Christian S. Jensen and Leo Mark and Nick Roussopoulos and Timos K. Sellis", title = "Using Differential Techniques to Efficiently Support Transaction Time", journal = j-VLDB-J, volume = "2", number = "1", pages = "75--116", month = jan, year = "1993", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:24 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/j/Jensen:Christian_S=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Mark:Leo.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Roussopoulos:Nick.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sellis:Timos_K=.html", abstract = "We present an architecture for query processing in the relational model extended with transaction time. The architecture integrates standard query optimization and computation techniques with new differential computation techniques. Differential computation computes a query incrementally or decrementally from the cached and indexed results of previous computations. The use of differential computation techniques is essential in order to provide efficient processing of queries that access very large temporal relations. Alternative query plans are integrated into a state transition network, where the state space includes backlogs of base relations, cached results from previous computations, a cache index, and intermediate results; the transitions include standard relational algebra operators, operators for constructing differential files, operators for differential computation, and combined operators. A rule set is presented to prune away parts of state transition networks that are not promising, and dynamic programming techniques are used to identify the optimal plans from the remaining state transition networks. An extended logical access path serves as a `structuring' index on the cached results and contains, in addition, vital statistics for the query optimization process (including statistics about base relations, backlogs, and queries---previously computed and cached, previously computed, or just previously estimated).", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "efficient query processing; incremental and decremental computation; temporal databases; transaction time", } @Article{Haritsa:1993:VBS, author = "Jayant R. Haritsa and Michael J. Carey and Miron Livny", title = "Value-Based Scheduling in Real-Time Database Systems", journal = j-VLDB-J, volume = "2", number = "2", pages = "117--152", month = apr, year = "1993", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:25 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Carey:Michael_J=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Haritsa:Jayant_R=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Livny:Miron.html", abstract = "In a real-time database system, an application may assign a {\em value\/} to a transaction to reflect the return it expects to receive if the transaction commits before its deadline. Most research on real-time database systems has focused on systems where all transactions are assigned the same value, the performance goal being to minimize the number of missed deadlines. When transactions are assigned different values, the goal of the system shifts to maximizing the sum of the values of those transactions that commit by their deadlines. Minimizing the number of missed deadlines becomes a secondary concern. In this article, we address the problem of establishing a priority ordering among transactions characterized by both values and deadlines that results in maximizing the realized value. Of particular interest is the tradeoff established between these values and deadlines in constructing the priority ordering. Using a detailed simulation model, we evaluate the performance of several priority mappings that make this tradeoff in different, but fixed, ways. In addition, a `bucket' priority mechanism that allows the relative importance of values and deadlines to be controlled is introduced and studied. The notion of associating a penalty with transactions whose deadlines are not met is also briefly considered.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "priority and concurrency algorithms; priority mapping; resource and data contention; transaction values and deadlines", } @Article{Grant:1993:QLR, author = "John Grant and Witold Litwin and Nick Roussopoulos and Timos K. Sellis", title = "Query Languages for Relational Multidatabases", journal = j-VLDB-J, volume = "2", number = "2", pages = "153--171", month = apr, year = "1993", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:25 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Grant:John.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Litwin:Witold.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Roussopoulos:Nick.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sellis:Timos_K=.html", abstract = "With the existence of many autonomous databases widely accessible through computer networks, users will require the capability to jointly manipulate data in different databases. A multidatabase system provides such a capability through a multidatabase manipulation language, such as MSQL. We propose a theoretical foundation for such languages by presenting a multirelational algebra and calculus based on the relational algebra and calculus. The proposal is illustrated by various queries on an example multidatabase. It is shown that properties of the multirelational algebra may be used for optimization and that every multirelational algebra query can be expressed as a multirelational calculus query. The connection between the multirelational languages and MSQL, the multidatabase version of SQL, is also investigated.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "multidatabase; multirelational algebra; multirelational calculus; query optimization", xxpages = "153--172", } @Article{Neufeld:1993:GCT, author = "Andrea Neufeld and Guido Moerkotte and Peter C. Lockemann", title = "Generating Consistent Test Data for a Variable Set of General Consistency Constraints", journal = j-VLDB-J, volume = "2", number = "2", pages = "173--213", month = apr, year = "1993", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:25 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lockemann:Peter_C=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Moerkotte:Guido.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/n/Neufeld:Andrea.html", abstract = "To address the problem of generating test data for a set of general consistency constraints, we propose a new two-step approach: First the interdependencies between consistency constraints are explored and a generator formula is derived on their basis. During its creation, the user may exert control. In essence, the generator formula contains information to restrict the search for consistent test databases. In the second step, the test database is generated. Here, two different approaches are proposed. The first adapts an already published approach to generating finite models by enhancing it with requirements imposed by test data generation. The second, a new approach, operationalizes the generator formula by translating it into a sequence of operators, and then executes it to construct the test database. For this purpose, we introduce two powerful operators: the generation operator and the test-and-repair operator. This approach also allows for enhancing the generation operators with heuristics for generating facts in a goal-directed fashion. It avoids the generation of test data that may contradict the consistency constraints, and limits the search space for the test data. This article concludes with a careful evaluation and comparison of the performance of the two approaches and their variants by describing a number of benchmarks and their results.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "consistency; design; logic; test data; validation", xxpages = "173--214", xxtitle = "Generating consistent test data: restricting the search space by a generator formula", } @Article{Du:1993:SCU, author = "Weimin Du and Ahmed K. Elmagarmid and Won Kim and Omran A. Bukhres", title = "Supporting Consistent Updates in Replicated Multidatabase Systems", journal = j-VLDB-J, volume = "2", number = "2", pages = "215--241", month = apr, year = "1993", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:25 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Bukhres:Omran_A=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Du:Weimin.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/e/Elmagarmid:Ahmed_K=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kim:Won.html", abstract = "Replication is useful in multidatabase systems (MDBSs) because, as in traditional distributed database systems, it increases data availability in the presence of failures and decreases data retrieval costs by reading local or close copies of data. Concurrency control, however, is more difficult in replicated MDBSs than in ordinary distributed database systems. This is the case not only because local concurrency controllers may schedule global transactions inconsistently, but also because local transactions (at different sites) may access the same replicated data. In this article, we propose a decentralized concurrency control protocol for a replicated MDBS. The proposed strategy supports prompt and consistent updates of replicated data by both local and global applications without a central coordinator.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "concurrency control; multidatabases; replica control; replicated data management; resolvable conflicts; serializability", } @Article{Anonymous:1993:Ca, author = "Anonymous", title = "Column", journal = j-VLDB-J, volume = "2", number = "2", pages = "??--??", month = apr, year = "1993", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:25 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Anonymous:1993:Cb, author = "Anonymous", title = "Column", journal = j-VLDB-J, volume = "2", number = "2", pages = "??--??", month = apr, year = "1993", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:25 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Tomasic:1993:SIP, author = "Anthony Tomasic and Hector Garcia-Molina", title = "Special Issue in Parallelism in Database Systems: Query Processing and Inverted Indices in Shared-Nothing Document Information Retrieval Systems", journal = j-VLDB-J, volume = "2", number = "3", pages = "243--275", month = jul, year = "1993", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Sep 27 08:46:01 MDT 2000", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Garcia=Molina:Hector.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Tomasic:Anthony.html", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Tomasic:1993:QPI, author = "Anthony Tomasic and Hector Garcia-Molina", title = "Query processing and inverted indices in shared: nothing text document information retrieval systems", journal = j-VLDB-J, volume = "2", number = "3", pages = "243--276", month = jul, year = "1993", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:26 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The performance of distributed text document retrieval systems is strongly influenced by the organization of the inverted text. This article compares the performance impact on query processing of various physical organizations for inverted lists. We present a new probabilistic model of the database and queries. Simulation experiments determine those variables that most strongly influence response time and throughput. This leads to a set of design trade-offs over a wide range of hardware configurations and new parallel query processing strategies.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "file organization; full text information retrieval; inverted file; inverted index; performance; query processing; shared-nothing; striping", } @Article{Ziane:1993:PQP, author = "Mikal Ziane and Mohamed Za{\"\i}t and Pascale Borla-Salamet", title = "Parallel Query Processing with Zigzag Trees", journal = j-VLDB-J, volume = "2", number = "3", pages = "277--301", month = jul, year = "1993", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:26 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Borla=Salamet:Pascale.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/z/Za=iuml=t:Mohamed.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/z/Ziane:Mikal.html", abstract = "In this article, we describe our approach to the compile-time optimization and parallelization of queries for execution in DBS3 or EDS. DBS3 is a shared-memory parallel database system, while the EDS system has a distributed-memory architecture. Because DBS3 implements a parallel dataflow execution model, this approach applies to both architectures. Using randomized search strategies enables the exploration of a search space large enough to include zigzag trees, which are intermediate between left-deep and right-deep trees. Zigzag trees are shown to provide better response time than right-deep trees in case of limited memory. Performance measurements obtained using the DBS3 prototype show the advantages of zigzag trees under various conditions.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "cost function; fragmentation; pipeline; search space", xxpages = "277--302", } @Article{Hua:1993:CDS, author = "Kien A. Hua and Yu-lung Lo and Honesty C. Young", title = "Considering Data Skew Factor in Multi-Way Join Query Optimization for Parallel Execution", journal = j-VLDB-J, volume = "2", number = "3", pages = "303--330", month = jul, year = "1993", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:26 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Hua:Kien_A=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lo:Yu=lung.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/y/Young:Honesty_C=.html", abstract = "A consensus on parallel architecture for very large database management has emerged. This architecture is based on a shared-nothing hardware organization. The computation model is very sensitive to skew in tuple distribution, however. Recently, several parallel join algorithms with dynamic load balancing capabilities have been proposed to address this issue, but none of them consider multi-way join problems. In this article we propose a dynamic load balancing technique for multi-way joins, and investigate the effect of load balancing on query optimization. In particular, we present a join-ordering strategy that takes load-balancing issues into consideration. Our performance study indicates that the proposed query optimization technique can provide very impressive performance improvement over conventional approaches.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "load balancing; multi-way join; parallel-database computer; query optimization", xxauthor = "Kien A. Hua and Yo Lung Lo and Honesty C. Young", } @Article{Zhang:1993:TGC, author = "Aidong Zhang and Ahmed K. Elmagarmid", title = "A Theory of Global Concurrency Control in Multidatabase Systems", journal = j-VLDB-J, volume = "2", number = "3", pages = "331--360", month = jul, year = "1993", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:26 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/e/Elmagarmid:Ahmed_K=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/z/Zhang:Aidong.html", abstract = "This article presents a theoretical basis for global concurrency control to maintain global serializability in multidatabase systems. Three correctness criteria are formulated that utilize the intrinsic characteristics of global transactions to determine the serialization order of global subtransactions at each local site. In particular, two new types of serializability, chain-conflicting serializability and sharing serializability, are proposed and hybrid serializability, which combines these two basic criteria, is discussed. These criteria offer the advantage of imposing no restrictions on local sites other than local serializability while retaining global serializability. The graph testing techniques of the three criteria are provided as guidance for global transaction scheduling. In addition, an optimal property of global transactions for determinating the serialization order of global subtransactions at local sites is formulated. This property defines the upper limit on global serializability in multidatabase systems.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "chain-conflicting serializability; hybrid serializability; optimality; sharing serializability", } @Article{Anonymous:1993:SIP, author = "Anonymous", title = "Special issue in parallelism in database systems", journal = j-VLDB-J, volume = "2", number = "3", pages = "??--??", month = jul, year = "1993", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:26 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Srinivasan:1993:PBT, author = "V. Srinivasan and Michael J. Carey", title = "Performance of {B$^+$} tree concurrency control algorithms", journal = j-VLDB-J, volume = "2", number = "4", pages = "361--406", month = oct, year = "1993", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:27 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Carey:Michael_J=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Srinivasan:V=.html", abstract = "A number of algorithms have been proposed to access B$^+$-trees concurrently, but they are not well understood. In this article, we study the performance of various B$^+$-tree concurrency control algorithms using a detailed simulation model of B$^+$-tree operations in a centralized DBMS. Our study covers a wide range of data contention situations and resource conditions. In addition, based on the performance of the set of B$^+$-tree concurrency control algorithms, which includes one new algorithm, we make projections regarding the performance of other algorithms in the literature. Our results indicate that algorithms with updaters that lock-couple using exclusive locks perform poorly as compared to those that permit more optimistic index descents. In particular, the B-link algorithms are seen to provide the most concurrency and the best overall performance. Finally, we demonstrate the need for a highly concurrent long-term lock holding strategy to obtain the full benefits of a highly concurrent algorithm for index operations.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "B+-tree structures; data contention; lock modes; performance; resource conditions; simulation models; workload parameters", xxtitle = "Performance of {B+} Tree Concurrency Algorithms", } @Article{Weikum:1993:MLT, author = "Gerhard Weikum and Christof Hasse", title = "Multi-Level Transaction Management for Complex Objects: Implementation, Performance, Parallelism", journal = j-VLDB-J, volume = "2", number = "4", pages = "407--453", month = oct, year = "1993", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:27 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Hasse:Christof.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/w/Weikum:Gerhard.html", abstract = "Multi-level transactions are a variant of open-nested transactions in which the subtransactions correspond to operations at different levels of a layered system architecture. They allow the exploitation of semantics of high-level operations to increase concurrency. As a consequence, undoing a transaction requires compensation of completed subtransactions. In addition, multi-level recovery methods must take into consideration that high-level operations are not necessarily atomic if multiple pages are updated in a single subtransaction. This article presents algorithms for multi-level transaction management that are implemented in the database kernel system (DASDBS). In particular, we show that multi-level recovery can be implemented in an efficient way. We discuss performance measurements using a synthetic benchmark for processing complex objects in a multi-user environment. We show that multi-level transaction management can be extended easily to cope with parallel subtransactions within a single transaction. Performance results are presented with varying degrees of inter- and intratransaction parallelism.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "atomicity; complex objects; inter- and intratransaction parallelism; multi-level transactions; performance; persistence; recovery", xxpages = "407--454", } @Article{Storey:1993:USR, author = "Veda C. Storey", title = "Understanding Semantic Relationships", journal = j-VLDB-J, volume = "2", number = "4", pages = "455--488", month = oct, year = "1993", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:27 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Storey:Veda_C=.html", abstract = "To develop sophisticated database management systems, there is a need to incorporate more understanding of the real world in the information that is stored in a database. Semantic data models have been developed to try to capture some of the meaning, as well as the structure, of data using abstractions such as inclusion, aggregation, and association. Besides these well-known relationships, a number of additional semantic relationships have been identified by researchers in other disciplines such as linguistics, logic, and cognitive psychology. This article explores some of the lesser-recognized semantic relationships and discusses both how they could be captured, either manually or by using an automated tool, and their impact on database design. To demonstrate the feasibility of this research, a prototype system for analyzing semantic relationships, called the Semantic Relationship Analyzer, is presented.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "database design; database design systems; entity-relationship model; relational model; semantic relationships", } @Article{Tseng:1993:SMS, author = "Frank Shou-Cheng Tseng and Arbee L. P. Chen and W.-P. Yang", title = "Searching a Minimal Semantically-Equivalent Subset of a Set of Partial Values", journal = j-VLDB-J, volume = "2", number = "4", pages = "489--512", month = oct, year = "1993", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:27 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chen:Arbee_L=_P=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Tseng:Frank_Shou=Cheng.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/y/Yang:W==P=.html", abstract = "Imprecise data exist in databases due to their unavailability or to data/schema incompatibilities in a multidatabase system. Partial values have been used to represent imprecise data. Manipulation of partial values is therefore necessary to process queries involving imprecise data. In this article, we study the problem of eliminating redundant partial values that result from a projection on an attribute with partial values. The redundancy of partial values is defined through the interpretation of a set of partial values. This problem is equivalent to searching a minimal semantically-equivalent subset of a set of partial values. A semantically-equivalent subset contains exactly the same information as the original set. We derive a set of useful properties and apply a graph matching technique to develop an efficient algorithm for searching such a minimal subset and therefore eliminating redundant partial values. By this process, we not only provide a concise answer to the user, but also reduce the communication cost when partial values are requested to be transmitted from one site to another site in a distributed environment. Moreover, further manipulation of the partial values can be simplified. This work is also extended to the case of multi-attribute projections.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "bipartite graph; graph matching; imprecise data; minimal elements; multidatabase systems; partial values", xxauthor = "Frank S. C. Tseng and Arbee L. P. Chen and Wei Pang Yang", } @Article{Georgakopoulos:1994:CST, author = "Dimitrios Georgakopoulos and Marek Rusinkiewicz and Witold Litwin", title = "Chronological Scheduling of Transactions with Temporal Dependencies", journal = j-VLDB-J, volume = "3", number = "1", pages = "1--28", month = jan, year = "1994", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:28 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Georgakopoulos:Dimitrios.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Litwin:Witold.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Rusinkiewicz:Marek.html", abstract = "Database applications often impose temporal dependencies between transactions that must be satisfied to preserve data consistency. The extant correctness criteria used to schedule the execution of concurrent transactions are either time independent or use strict, difficult to satisfy real-time constraints. On one end of the spectrum, serializability completely ignores time. On the other end, deadline scheduling approaches consider the outcome of each transaction execution correct only if the transaction meets its real-time deadline. In this article, we explore new correctness criteria and scheduling methods that capture temporal transaction dependencies and belong to the broad area between these two extreme approaches. We introduce the concepts of {\em succession dependency\/} and {\em chronological dependency\/} and define correctness criteria under which temporal dependencies between transactions are preserved even if the dependent transactions execute concurrently. We also propose a {\em chronological scheduler\/} that can guarantee that transaction executions satisfy their chronological constraints. The advantages of chronological scheduling over traditional scheduling methods, as well as the main issues in the implementation and performance of the proposed scheduler, are discussed.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "concurrent succession; execution correctness; partial rollbacks; synchronization; transaction ordering", } @Article{Whang:1994:DMD, author = "Kyu Young Whang and Sang Wook Kim and Gio Wiederhold", title = "Dynamic Maintenance of Data Distribution for Selectivity Estimation", journal = j-VLDB-J, volume = "3", number = "1", pages = "29--51", month = jan, year = "1994", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:28 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kim:Sang=Wook.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/w/Whang:Kyu=Young.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/w/Wiederhold:Gio.html", abstract = "We propose a new dynamic method for multidimensional selectivity estimation for range queries that works accurately independent of data distribution. Good estimation of selectivity is important for query optimization and physical database design. Our method employs the multilevel grid file (MLGF) for accurate estimation of multidimensional data distribution. The MLGF is a dynamic, hierarchical, balanced, multidimensional file structure that gracefully adapts to nonuniform and correlated distributions. We show that the MLGF directory naturally represents a multidimensional data distribution. We then extend it for further refinement and present the selectivity estimation method based on the MLGF. Extensive experiments have been performed to test the accuracy of selectivity estimation. The results show that estimation errors are very small independent of distributions, even with correlated and/or highly skewed ones. Finally, we analyze the cause of errors in estimation and investigate the effects of various parameters on the accuracy of estimation.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "multidimensional file structure; multilevel grid files; physical database design; query optimization", } @Article{Kamel:1994:PBO, author = "Nabil Kamel and Ping Wu and Stanley Y. W. Su", title = "A Pattern-Based Object Calculus", journal = j-VLDB-J, volume = "3", number = "1", pages = "53--76", month = jan, year = "1994", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:28 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kamel:Nabil.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Su:Stanley_Y=_W=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/w/Wu:Ping.html", abstract = "Several object-oriented database management systems have been implemented without an accompanying theoretical foundation for constraint, query specification, and processing. The pattern-based object calculus presented in this article provides such a theoretical foundation for describing and processing object-oriented databases. We view an object-oriented database as a network of interrelated classes (i.e., the intension) and a collection of time-varying object association patterns (i.e., the extension). The object calculus is based on first-order logic. It provides the formalism for interpreting precisely and uniformly the semantics of queries and integrity constraints in object-oriented databases. The power of the object calculus is shown in four aspects. First, associations among objects are expressed explicitly in an object-oriented database. Second, the `nonassociation' operator is included in the object calculus. Third, set-oriented operations can be performed on both homogeneous and heterogeneous object association patterns. Fourth, our approach does not assume a specific form of database schema. A proposed formalism is also applied to the design of high-level object-oriented query and constraint languages.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "association patterns; Object-oriented databases; query expressions; semantic constraints", } @Article{Sciore:1994:VCM, author = "Edward Sciore", title = "Versioning and Configuration Management in an Object-Oriented Data Model", journal = j-VLDB-J, volume = "3", number = "1", pages = "77--106", month = jan, year = "1994", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:28 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sciore:Edward.html", abstract = "Many database applications require the storage and manipulation of different versions of data objects. To satisfy the diverse needs of these applications, current database systems support versioning at a very low level. This article demonstrates that application-independent versioning can be supported at a significantly higher level. In particular, we extend the EXTRA data model and EXCESS query language so that configurations can be specified conceptually and non-procedurally. We also show how version sets can be viewed multidimensionally, thereby allowing configurations to be expressed at a higher level of abstraction. The resulting model integrates and generalizes ideas in CAD systems, CASE systems, and temporal databases.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "EXTRA/EXCESS data models; generic and specific references; query language; semantically based configuration specifications", } @Article{Ramamohanarao:1994:IDD, author = "Kotagiri Ramamohanarao and James Harland", title = "An introduction to deductive database languages and systems", journal = j-VLDB-J, volume = "3", number = "2", pages = "107--122", month = apr, year = "1994", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ramamohanarao:1994:SIP, author = "Kotagiri Ramamohanarao and James Harland", title = "Special Issue on Prototypes of Deductive Database Systems: An Introduction to Deductive Database Languages and Systems", journal = j-VLDB-J, volume = "3", number = "2", pages = "107--122", month = apr, year = "1994", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Sep 27 08:46:01 MDT 2000", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Harland:James.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Ramamohanarao:Kotagiri.html", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Derr:1994:GND, author = "Marcia A. Derr and Shinichi Morishita and Geoffrey Phipps", title = "The Glue-Nail Deductive Database System: Design, Implementation, and Evaluation", journal = j-VLDB-J, volume = "3", number = "2", pages = "123--160", month = apr, year = "1994", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:29 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Derr:Marcia_A=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Morishita:Shinichi.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Phipps:Geoffrey.html", abstract = "We describe the design and implementation of the Glue-Nail deductive database system. Nail is a purely declarative query language; Glue is a procedural language used for non-query activities. The two languages combined are sufficient to write a complete application. Nail and Glue code are both compiled into the target language IGlue. The Nail compiler uses variants of the magic sets algorithm and supports well-founded models. The Glue compiler's static optimizer uses peephole techniques and data flow analysis to improve code. The IGlue interpreter features a run-time adaptive optimizer that reoptimizes queries and automatically selects indexes. We also describe the Glue-Nail benchmark suite, a set of applications developed to evaluate the Glue-Nail language and to measure the performance of the system.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "language; performance; query optimization", } @Article{Ramakrishnan:1994:CDS, author = "Raghu Ramakrishnan and Divesh Srivastava and S. Sudarshan and Praveen Seshadri", title = "The {CORAL} Deductive System", journal = j-VLDB-J, volume = "3", number = "2", pages = "161--210", month = apr, year = "1994", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:29 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Ramakrishnan:Raghu.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Seshadri:Praveen.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Srivastava:Divesh.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sudarshan:S=.html", abstract = "CORAL is a deductive system that supports a rich declarative language, and an interface to C++, which allows for a combination of declarative and imperative programming. A CORAL declarative program can be organized as a collection of interacting modules. CORAL supports a wide range of evaluation strategies, and automatically chooses an efficient strategy for each module in the program. Users can guide query optimization by selecting from a wide range of control choices. The CORAL system provides imperative constructs to update, insert, and delete facts. Users can program in a combination of declarative CORAL and C++ extended with CORAL primitives. A high degree of extensibility is provided by allowing C++ programmers to use the class structure of C++ to enhance the CORAL implementation. CORAL provides support for main-memory data and, using the EXODUS storage manager, disk-resident data. We present a comprehensive view of the system from broad design goals, the language, and the architecture, to language interfaces and implementation details.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "deductive database; logic programming system; query language", } @Article{Kiessling:1994:DSE, author = "Werner Kie{\ss}ling and Helmut Schmidt and Werner Strau{\ss} and Gerhard D{\"u}nzinger", title = "{DECLARE} and {SDS}: Early Efforts to Commercialize Deductive Database Technology", journal = j-VLDB-J, volume = "3", number = "2", pages = "211--243", month = apr, year = "1994", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:29 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/D=uuml=nzinger:Gerhard.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kie=szlig=ling:Werner.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Schmidt:Helmut.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Strau=szlig=:Werner.html", abstract = "The Smart Data System (SDS) and its declarative query language, Declarative Reasoning, represent the first large-scale effort to commercialize deductive database technology. SDS offers the functionality of deductive reasoning in a distributed, heterogeneous database environment. In this article we discuss several interesting aspects of the query compilation and optimization process. The emphasis is on the query execution plan data structure and its transformations by the optimizing rule compiler. Through detailed case studies we demonstrate that efficient and very compact runtime code can be generated. We also discuss our experiences gained from a large pilot application (the MVV-expert) and report on several issues of practical interest in engineering such a complex system, including the migration from Lisp to C. We argue that heuristic knowledge and control should be made an integral part of deductive databases.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "declarative reasoning; distributed query processing; heuristic control; multi-databases; productization; query optimizer", } @Article{Vaghani:1994:ADD, author = "Jayen Vaghani and Kotagiri Ramamohanarao and David B. Kemp and Zoltan Somogyi and Peter J. Stuckey and Tim S. Leask and James Harland", title = "The {Aditi} Deductive Database System", journal = j-VLDB-J, volume = "3", number = "2", pages = "245--288", month = apr, year = "1994", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:29 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Harland:James.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kemp:David_B=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Leask:Tim_S=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Ramamohanarao:Kotagiri.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Somogyi:Zoltan.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Stuckey:Peter_J=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/v/Vaghani:Jayen.html", abstract = "Deductive databases generalize relational databases by providing support for recursive views and non-atomic data. Aditi is a deductive system based on the client-server model; it is inherently multi-user and capable of exploiting parallelism on shared-memory multiprocessors. The back-end uses relational technology for efficiency in the management of disk-based data and uses optimization algorithms especially developed for the bottom-up evaluation of logical queries involving recursion. The front-end interacts with the user in a logical language that has more expressive power than relational query languages. We present the structure of Aditi, discuss its components in some detail, and present performance figures.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "implementation; logic; multi-user; parallelism; relational database", } @Article{Anonymous:1994:SIP, author = "Anonymous", title = "Special issue on prototypes of deductive database systems", journal = j-VLDB-J, volume = "3", number = "2", pages = "??--??", month = apr, year = "1994", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lee:1994:EIV, author = "Byung Suk Lee and Gio Wiederhold", title = "Efficiently Instantiating View-Objects From Remote Relational Databases", journal = j-VLDB-J, volume = "3", number = "3", pages = "289--323", month = jul, year = "1994", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:30 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lee:Byung_Suk.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/w/Wiederhold:Gio.html", abstract = "View-objects are complex objects that are instantiated by delivering a query to a database and converting the query result into a nested structure. In relational databases, query results are conventionally retrieved as a single flat relation, which contains duplicate subtuples in its composite tuples. These duplicate subtuples increase the amount of data to be handled and thus degrade performance. In this article, we describe two new methods that retrieve a query result in structures other than a single flat relation. One method retrieves a set of relation fragments, and the other retrieves a single-nested relation. We first describe their algorithms and cost models, and then present the cost comparison results in a client-server architecture with a relational main memory database residing on a server.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "client server; complex object; nested relation; query optimization; relation fragments", } @Article{Barbara-Milla:1994:DPT, author = "Daniel Barbar{\'a}-Mill{\'a} and Hector Garcia-Molina", title = "The demarcation protocol: a technique for maintaining constraints in distributed database systems", journal = j-VLDB-J, volume = "3", number = "3", pages = "325--353", month = jul, year = "1994", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Traditional protocols for distributed database management have a high message overhead; restrain or lock access to resources during protocol execution; and may become impractical for some scenarios like real-time systems and very large distributed databases. In this article, we present the demarcation protocol; it overcomes these problems by using explicit consistency constraints as the correctness criteria. The method establishes safe limits as `lines drawn in the sand' for updates, and makes it possible to change these limits dynamically, enforcing the constraints at all times. We show how this technique can be applied to linear arithmetic, existential, key, and approximate copy constraints.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "consistency constraints; serializability; transaction limits", } @Article{Barbara:1994:DPT, author = "Daniel Barbar{\'a} and Hector Garcia-Molina", title = "The Demarcation Protocol: a Technique for Maintaining Constraints in Distributed Database Systems", journal = j-VLDB-J, volume = "3", number = "3", pages = "325--353", month = jul, year = "1994", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Sep 27 08:46:01 MDT 2000", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Barbar=aacute=:Daniel.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Garcia=Molina:Hector.html", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Bertino:1994:ICO, author = "Elisa Bertino", title = "Index Configuration in Object-Oriented Databases", journal = j-VLDB-J, volume = "3", number = "3", pages = "355--399", month = jul, year = "1994", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:30 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Bertino:Elisa.html", abstract = "In relational databases, an attribute of a relation can have only a single primitive value, making it cumbersome to model complex objects. The object-oriented paradigm removes this difficulty by introducing the notion of nested objects, which allows the value of an object attribute to be another object or a set of other objects. This means that a class consists of a set of attributes, and the values of the attributes are objects that belong to other classes; that is, the definition of a class forms a hierarchy of classes. All attributes of the nested classes are nested attributes of the root of the hierarchy. A branch of such hierarchy is called a {\em path}. In this article, we address the problem of index configuration for a given path. We first summarize some basic concepts, and introduce the concept of index configuration for a path. Then we present cost formulas to evaluate the costs of the various configurations. Finally, we present the algorithm that determines the optimal configuration, and show its correctness.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "index selection; physical database design; query optimization", } @Article{Guting:1994:ISD, author = "Ralf Hartmut G{\"u}ting", title = "An introduction to spatial database systems", journal = j-VLDB-J, volume = "3", number = "4", pages = "357--399", month = oct, year = "1994", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:31 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We propose a definition of a spatial database system as a database system that offers spatial data types in its data model and query language, and supports spatial data types in its implementation, providing at least spatial indexing and spatial join methods. Spatial database systems offer the underlying database technology for geographic information systems and other applications. We survey data modeling, querying, data structures and algorithms, and system architecture for such systems. The emphasis is on describing known technology in a coherent manner, rather than listing open problems.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Guting:1994:SIS, author = "Ralf Hartmut G{\"u}ting", title = "Special Issue on Spatial Database Systems: An Introduction to Spatial Database Systems", journal = j-VLDB-J, volume = "3", number = "4", pages = "357--399", month = oct, year = "1994", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Sep 27 08:46:01 MDT 2000", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/G=uuml=ting:Ralf_Hartmut.html", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Baumann:1994:MMD, author = "Peter Baumann", title = "Management of Multidimensional Discrete Data", journal = j-VLDB-J, volume = "3", number = "4", pages = "401--444", month = oct, year = "1994", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:31 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Baumann:Peter.html", abstract = "Spatial database management involves two main categories of data: vector and raster data. The former has received a lot of in-depth investigation; the latter still lacks a sound framework. Current DBMSs either regard raster data as pure byte sequences where the DBMS has no knowledge about the underlying semantics, or they do not complement array structures with storage mechanisms suitable for huge arrays, or they are designed as specialized systems with sophisticated imaging functionality, but no general database capabilities (e.g., a query language). Many types of array data will require database support in the future, notably 2-D images, audio data and general signal-time series (1-D), animations (3-D), static or time-variant voxel fields (3-D and 4-D), and the ISO/IEC PIKS (Programmer's Imaging Kernel System) BasicImage type (5-D). In this article, we propose a comprehensive support of {\em multidimensional discrete data\/} (MDD) in databases, including operations on arrays of arbitrary size over arbitrary data types. A set of requirements is developed, a small set of language constructs is proposed (based on a formal algebraic semantics), and a novel MDD architecture is outlined to provide the basis for efficient MDD query evaluation.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "image database systems; multimedia database systems; spatial index; tiling", } @Article{Chu:1994:SMA, author = "Wesley W. Chu and Ion Tim Ieong and Ricky K. Taira", title = "A Semantic Modeling Approach for Image Retrieval by Content", journal = j-VLDB-J, volume = "3", number = "4", pages = "445--477", month = oct, year = "1994", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:31 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chu:Wesley_W=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/i/Ieong:Ion_Tim.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Taira:Ricky_K=.html", abstract = "We introduce a semantic data model to capture the hierarchical, spatial, temporal, and evolutionary semantics of images in pictorial databases. This model mimics the user's conceptual view of the image content, providing the framework and guidelines for preprocessing to extract image features. Based on the model constructs, a spatial evolutionary query language (SEQL), which provides direct image object manipulation capabilities, is presented. With semantic information captured in the model, spatial evolutionary queries are answered efficiently. Using an object-oriented platform, a prototype medical-image management system was implemented at UCLA to demonstrate the feasibility of the proposed approach.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "image; medical; multimedia databases; spatial query processing; temporal evolutionary query processing", } @Article{Papadias:1994:QRS, author = "Dimitris Papadias and Timos K. Sellis", title = "Qualitative Representation of Spatial Knowledge in Two-Dimensional Space", journal = j-VLDB-J, volume = "3", number = "4", pages = "479--516", month = oct, year = "1994", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:31 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Papadias:Dimitris.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sellis:Timos_K=.html", abstract = "Various relation-based systems, concerned with the qualitative representation and processing of spatial knowledge, have been developed in numerous application domains. In this article, we identify the common concepts underlying qualitative spatial knowledge representation, we compare the representational properties of the different systems, and we outline the computational tasks involved in relation-based spatial information processing. We also describe {\em symbolic spatial indexes}, relation-based structures that combine several ideas in spatial knowledge representation. A symbolic spatial index is an array that preserves only a set of spatial relations among distinct objects in an image, called the modeling space; the index array discards information, such as shape and size of objects, and irrelevant spatial relations. The construction of a symbolic spatial index from an input image can be thought of as a transformation that keeps only a set of representative points needed to define the relations of the modeling space. By keeping the relative arrangements of the representative points in symbolic spatial indexes and discarding all other points, we maintain enough information to answer queries regarding the spatial relations of the modeling space without the need to access the initial image or an object database. Symbolic spatial indexes can be used to solve problems involving route planning, composition of spatial relations, and update operations.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "qualitative spatial information processing; representation of direction and topological relations; spatial data models; spatial query languages", } @Article{Lin:1994:TTI, author = "King Ip Lin and H. V. Jagadish and Christos Faloutsos", title = "The {TV}-Tree: An Index Structure for High-Dimensional Data", journal = j-VLDB-J, volume = "3", number = "4", pages = "517--542", month = oct, year = "1994", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:31 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/f/Faloutsos:Christos.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/j/Jagadish:H=_V=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lin:King=Ip.html", abstract = "We propose a file structure to index high-dimensionality data, which are typically points in some feature space. The idea is to use only a few of the features, using additional features only when the additional discriminatory power is absolutely necessary. We present in detail the design of our tree structure and the associated algorithms that handle such `varying length' feature vectors. Finally, we report simulation results, comparing the proposed structure with the $ R*$-tree, which is one of the most successful methods for low-dimensionality spaces. The results illustrate the superiority of our method, which saves up to 80\% in disk accesses.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "query by content; similarity retrieval; spatial index", } @Article{Anonymous:1994:SIS, author = "Anonymous", title = "Special issue on spatial database systems", journal = j-VLDB-J, volume = "3", number = "4", pages = "??--??", month = oct, year = "1994", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:31 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Constantopoulos:1995:SIB, author = "Panos Constantopoulos and Matthias Jarke and John Mylopoulos and Yannis Vassiliou", title = "The Software Information Base: a Server for Reuse", journal = j-VLDB-J, volume = "4", number = "1", pages = "1--43", month = jan, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:32 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Constantopoulos:Panos.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/j/Jarke:Matthias.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Mylopoulos:John.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/v/Vassiliou:Yannis.html", abstract = "We present an experimental software repository system that provides organization, storage, management, and access facilities for reusable software components. The system, intended as part of an applications development environment, supports the representation of information about requirements, designs and implementations of software, and offers facilities for visual presentation of the software objects. This article details the features and architecture of the repository system, the technical challenges and the choices made for the system development along with a usage scenario that illustrates its functionality. The system has been developed and evaluated within the context of the ITHACA project, a technology integration/software engineering project sponsored by the European Communities through the ESPRIT program, aimed at developing an integrated reuse-centered application development and support environment based on object-oriented techniques.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "conceptual modeling; information storage and retrieval; object-oriented databases; reuse; software engineering", } @Article{Clifton:1995:HDQ, author = "Chris Clifton and Hector Garcia-Molina and David Bloom", title = "{HyperFile}: a Data and Query Model for Documents", journal = j-VLDB-J, volume = "4", number = "1", pages = "45--86", month = jan, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:32 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Bloom:David.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Clifton:Chris.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Garcia=Molina:Hector.html", abstract = "Non-quantitative information such as documents and pictures pose interesting new problems in the database world. Traditional data models and query languages do not provide appropriate support for this information. Such data are typically stored in file systems, which do not provide the security, integrity, or query features of database management systems. The hypertext model has emerged as a good interface to this information; however, {\em finding\/} information using hypertext browsing does not scale well. We developed a query interface that serves as an extension of the browsing model of hypertext systems. These queries minimize the repeated user interactions required to locate data in a standard hypertext system. HyperFile is a prototype data server interface. In this article, we describe HyperFile, including a number of issues such as query generation, query processing, and indexing.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "hypertext; indexing; user interface", } @Article{Agrawal:1995:OSL, author = "Divyakant Agrawal and Amr {El Abbadi} and Richard Jeffers and Lijing Lin", title = "Ordered Shared Locks for Real-Time Databases", journal = j-VLDB-J, volume = "4", number = "1", pages = "87--126", month = jan, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:32 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Abbadi:Amr_El.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Agrawal:Divyakant.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/j/Jeffers:Richard.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lin:Lijing.html", abstract = "We propose locking protocols for real-time databases. Our approach has two main motivations: First, locking protocols are widely accepted and used in most database systems. Second, in real-time databases it has been shown that the blocking behavior of transactions in locking protocols results in performance degradation. We use a new relationship between locks called ordered sharing to eliminate blocking that arises in the traditional locking protocols. Ordered sharing eliminates blocking of read and write operations but may result in delayed termination. Since timeliness and not response time is the crucial factor in real-time databases, our protocols exploit this delay to allow transactions to execute within the slacks of delayed transactions. We compare the performance of the proposed protocols with the two-phase locking protocol for real-time databases. Our experiments indicate that the proposed protocols significantly reduce the percentage of missed deadlines in the system for a variety of workloads.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "concurrency control; time-critical scheduling; transaction management", } @Article{Dan:1995:CDA, author = "Asit Dan and Philip S. Yu and Jen Yao Chung", title = "Characterization of Database Access Pattern for Analytic Prediction of Buffer Hit Probability", journal = j-VLDB-J, volume = "4", number = "1", pages = "127--154", month = jan, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:32 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chung:Jen=Yao.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Dan:Asit.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/y/Yu:Philip_S=.html", abstract = "The analytic prediction of buffer hit probability, based on the characterization of database accesses from real reference traces, is extremely useful for workload management and system capacity planning. The knowledge can be helpful for proper allocation of buffer space to various database relations, as well as for the management of buffer space for a mixed transaction and query environment. Access characterization can also be used to predict the buffer invalidation effect in a multi-node environment which, in turn, can influence transaction routing strategies. However, it is a challenge to characterize the database access pattern of a real workload reference trace in a simple manner that can easily be used to compute buffer hit probability. In this article, we use a characterization method that distinguishes three types of access patterns from a trace: (1) locality within a transaction, (2) random accesses by transactions, and (3) sequential accesses by long queries. We then propose a concise way to characterize the access skew across randomly accessed pages by logically grouping the large number of data pages into a small number of partitions such that the frequency of accessing each page within a partition can be treated as equal. Based on this approach, we present a recursive binary partitioning algorithm that can infer the access skew characterization from the buffer hit probabilities for a subset of the buffer sizes. We validate the buffer hit predictions for single and multiple node systems using production database traces. We further show that the proposed approach can predict the buffer hit probability of a composite workload from those of its component files.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "access skew; analytic prediction; database access characterization; reference trace; sequential access; workload management", } @Article{Peckham:1995:DME, author = "Joan Peckham and Bonnie MacKellar and Michael Doherty", title = "Data Model for Extensible Support of Explicit Relationships in Design Databases", journal = j-VLDB-J, volume = "4", number = "2", pages = "157--191", month = apr, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:33 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Doherty:Michael.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/MacKellar:Bonnie.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Peckham:Joan.html", abstract = "We describe the conceptual model of SORAC, a data modeling system developed at the University of Rhode Island. SORAC supports both semantic objects and relationships, and provides a tool for modeling databases needed for complex design domains. SORAC's set of built-in semantic relationships permits the schema designer to specify enforcement rules that maintain constraints on the object and relationship types. SORAC then automatically generates C++ code to maintain the specified enforcement rules, producing a schema that is compatible with Ontos. This facilitates the task of the schema designer, who no longer has to ensure that all methods on object classes correctly maintain necessary constraints. In addition, explicit specification of enforcement rules permits automated analysis of enforcement propagations. We compare the interpretations of relationships within the semantic and object-oriented models as an introduction to the mixed model that SORAC supports. Next, the set of built-in SORAC relationship types is presented in terms of the enforcement rules permitted on each relationship type. We then use the modeling requirements of an architectural design support system, called ArchObjects, to demonstrate the capabilities of SORAC. The implementation of the current SORAC prototype is also briefly discussed.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "computer-aided architectural design; database constraints; relationship semantics; semantic and object-oriented data modeling", xxpages = "157--192", } @Article{Teniente:1995:UKB, author = "Ernest Teniente and Antoni Oliv{\'e}", title = "Updating Knowledge Bases While Maintaining Their Consistency", journal = j-VLDB-J, volume = "4", number = "2", pages = "193--241", month = apr, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:33 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/o/Oliv=eacute=:Antoni.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Teniente:Ernest.html", abstract = "When updating a knowledge base, several problems may arise. One of the most important problems is that of integrity constraints satisfaction. The classic approach to this problem has been to develop methods for {\em checking\/} whether a given update violates an integrity constraint. An alternative approach consists of trying to repair integrity constraints violations by performing additional updates that {\em maintain\/} knowledge base consistency. Another major problem in knowledge base updating is that of {\em view updating}, which determines how an update request should be translated into an update of the underlying base facts. We propose a new method for updating knowledge bases while maintaining their consistency. Our method can be used for both integrity constraints maintenance and view updating. It can also be combined with any integrity checking method for view updating and integrity checking. The kind of updates handled by our method are: updates of base facts, view updates, updates of deductive rules, and updates of integrity constraints. Our method is based on events and transition rules, which explicitly define the insertions and deletions induced by a knowledge base update. Using these rules, an extension of the SLDNF procedure allows us to obtain all possible minimal ways of updating a knowledge base without violating any integrity constraint.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "integrity checking; integrity maintenance; view updating", } @Article{Guting:1995:RBS, author = "Ralf Hartmut G{\"u}ting and Markus Schneider", title = "Realm-Based Spatial Data Types: The {ROSE} Algebra", journal = j-VLDB-J, volume = "4", number = "2", pages = "243--286", month = apr, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:33 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/G=uuml=ting:Ralf_Hartmut.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Schneider:Markus.html", abstract = "Spatial data types or algebras for database systems should (1) be fully general, that is, closed under set operations, (2) have formally defined semantics, (3) be defined in terms of finite representations available in computers, (4) offer facilities to enforce geometric consistency of related spatial objects, and (5) be independent of a particular DBMS data model, but cooperate with any. We present an algebra that uses {\em realms\/} as geometric domains underlying spatial data types. A realm, as a general database concept, is a finite, dynamic, user-defined structure underlying one or more system data types. Problems of numerical robustness and topological correctness are solved within and below the realm layer so that spatial algebras defined above a realm have very nice algebraic properties. Realms also interact with a DMBS to enforce geometric consistency on object creation or update. The ROSE algebra is defined on top of realms and offers general types to represent point, line, and region features, together with a comprehensive set of operations. It is described within a polymorphic type system and interacts with a DMBS data model and query language through an abstract {\em object model interface.} An example integration of ROSE into the object-oriented data model $ O^2 $ and its query language is presented.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "finite resolution; geometric consistency; numerical robustness; object model interface; realm; topological correctness", } @Article{Templeton:1995:IDC, author = "Marjorie Templeton and Herbert Henley and Edward Maros and Darrel J. {Van Buer}", title = "{InterViso}: Dealing With the Complexity of Federated Database Access", journal = j-VLDB-J, volume = "4", number = "2", pages = "287--317", month = apr, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:33 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Buer:Darrel_J=_Van.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Henley:Herbert.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Maros:Edward.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Templeton:Marjorie.html", abstract = "Connectivity products are finally available to provide the `highways' between computers containing data. IBM has provided strong validation of the concept with their `Information Warehouse.' DBMS vendors are providing gateways into their products, and SQL is being retrofitted on many older DBMSs to make it easier to access data from standard 4GL products and application development systems. The next step needed for data integration is to provide (1) a common data dictionary with a conceptual schema across the data to mask the many differences that occur when databases are developed independently and (2) a server that can access and integrate the databases using information from the data dictionary. In this article, we discuss InterViso, one of the first commercial federated database products. InterViso is based on Mermaid, which was developed at SDC and Unisys (Templeton et al., 1987b). It provides a value added layer above connectivity products to handle views across databases, schema translation, and transaction management.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data warehouse; database integration; federated database", xxpages = "287--318", } @Article{Atkinson:1995:SIP, author = "Malcolm P. Atkinson and Ronald Morrison", title = "Special Issue on Persistent Object Systems: Orthogonally Persistent Object Systems", journal = j-VLDB-J, volume = "4", number = "3", pages = "319--401", month = jul, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Sep 27 08:46:01 MDT 2000", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Atkinson:Malcolm_P=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Morrison:Ronald.html", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Atkinson:1995:OPO, author = "Malcolm Atkinson and Ronald Morrison", title = "Orthogonally persistent object systems", journal = j-VLDB-J, volume = "4", number = "3", pages = "319--402", month = jul, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:34 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Persistent Application Systems (PASs) are of increasing social and economic importance. They have the potential to be long-lived, concurrently accessed, and consist of large bodies of data and programs. Typical examples of PASs are CAD/CAM systems, office automation, CASE tools, software engineering environments, and patient-care support systems in hospitals. Orthogonally persistent object systems are intended to provide improved support for the design, construction, maintenance, and operation of PASs. Persistence abstraction allows the creation and manipulation of data in a manner that is independent of its lifetime, thereby integrating the database view of information with the programming language view. This yields a number of advantages in terms of orthogonal design and programmer productivity which are beneficial for PASs. Design principles have been proposed for persistent systems. By following these principles, languages that provide persistence as a basic abstraction have been developed. In this paper, the motivation for orthogonal persistence is reviewed along with the above mentioned design principles. The concepts for integrating programming languages and databases through the persistence abstraction, and their benefits, are given. The technology to support persistence, the achievements, and future directions of persistence research are then discussed.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "database programming languages; orthogonal persistence; persistent application systems; persistent programming languages", } @Article{Albano:1995:FPL, author = "Antonio Albano and Giorgio Ghelli and Renzo Orsini", title = "{Fibonacci}: a Programming Language for Object Databases", journal = j-VLDB-J, volume = "4", number = "3", pages = "403--444", month = jul, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:34 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Albano:Antonio.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Ghelli:Giorgio.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/o/Orsini:Renzo.html", abstract = "Fibonacci is an object-oriented database programming language characterized by static and strong typing, and by new mechanisms for modeling databases in terms of objects with roles, classes, and associations. A brief introduction to the language is provided to present those features, which are particularly suited to modeling complex databases. Examples of the use of Fibonacci are given with reference to the prototype implementation of the language.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data models; database programming languages; objects with roles", } @Article{Ozsu:1995:TUB, author = "M. Tamer {\"O}zsu and Randal J. Peters and Duane Szafron and Boman Irani and Anna Lipka and Adriana Mu{\~n}oz", title = "{TIGUKAT}: a Uniform Behavioral Objectbase Management System", journal = j-VLDB-J, volume = "4", number = "3", pages = "445--492", month = jul, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:34 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/=/=Ouml=zsu:M=_Tamer.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/i/Irani:Boman.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lipka:Anna.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Mu=ntilde=oz:Adriana.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Peters:Randal_J=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Szafron:Duane.html", abstract = "We describe the TIGUKAT objectbase management system, which is under development at the Laboratory for Database Systems Research at the University of Alberta. TIGUKAT has a novel object model, whose identifying characteristics include a purely behavioral semantics and a uniform approach to objects. Everything in the system, including types, classes, collections, behaviors, and functions, as well as meta-information, is a first-class object with well-defined behavior. In this way, the model abstracts everything, including traditional structural notions such as instance variables, method implementation, and schema definition, into a uniform semantics of behaviors on objects. Our emphasis in this article is on the object model, its implementation, the persistence model, and the query language. We also (briefly) present other database management functions that are under development such as the query optimizer, the version control system, and the transaction manager.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "database management; objectbase management; persistent storage system; reflective system", } @Article{Benzaken:1995:TDP, author = "V{\'e}ronique Benzaken and Anne Doucet", title = "{Th{\'e}mis}: a Database Programming Language Handling Integrity Constraints", journal = j-VLDB-J, volume = "4", number = "3", pages = "493--517", month = jul, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:34 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Benzaken:V=eacute=ronique.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Doucet:Anne.html", abstract = "This article presents a database programming language, Th{\'e}mis, which supports subtyping and class hierarchies, and allows for the definition of integrity constraints in a global and declarative way. We first describe the salient features of the language: types, names, classes, integrity constraints (including methods), and transactions. The inclusion of methods into integrity constraints allows an increase of the declarative power of these constraints. Indeed, the information needed to define a constraint is not always stored in the database through attributes, but is sometimes computed or derived data. Then, we address the problem of efficiently checking constraints. More specifically, we consider two different problems: (1) statically reducing the number of constraints to be checked, and (2) generating an efficient run-time checker. Using simple strategies, one can significantly improve the efficiency of the verification. We show how to reduce the number of constraints to be checked by characterizing the portions of the database that are involved in both the constraints and in a transaction. We also show how to generate efficient algorithms for checking a large class of constraints. We show how all the techniques presented exploit the underlying type system, which provides significant help in solving (1) and \1. Last, the current status of the Th{\'e}mis prototype is presented.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "database programming language; integrity constraints; program analysis", } @Article{Kemper:1995:APS, author = "Alfons Kemper and Donald Kossmann", title = "Adaptable Pointer Swizzling Strategies in Object Bases: Design, Realization, and Quantitative Analysis", journal = j-VLDB-J, volume = "4", number = "3", pages = "519--566", month = jul, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:34 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kemper:Alfons.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kossmann:Donald.html", abstract = "In this article, different techniques for {\em `pointer swizzling'\/} are classified and evaluated for optimizing the access to main-memory resident persistent objects. To speed up the access along inter-object references, the persistent pointers in the form of unique object identifiers (OIDs) are transformed (swizzled) into main-memory pointers (addresses). Pointer swizzling techniques can be divided into two classes: (1) those that allow replacement of swizzled objects from the buffer before the end of an application program, and (2) those that rule out the displacement of swizzled objects. The first class (i.e., techniques that take `precautions' for the replacement of swizzled objects) has not yet been thoroughly investigated. Four different pointer swizzling techniques allowing object replacement are investigated and compared with the performance of an object manager employing no pointer swizzling. The extensive qualitative and quantitative evaluation---only part of which could be presented in this article---demonstrate that there is no {\em one\/} superior pointer swizzling strategy for {\em all\/} application profiles. Therefore, an adaptable object base run-time system is devised that employs the full range of pointer swizzling strategies, depending on the application profile characteristics that are determined by, for example, monitoring in combination with sampling, user specifications, and/or program analysis.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "object-oriented database systems; performance evaluation; pointer swizzling", xxpages = "519--567", } @Article{Anonymous:1995:SIP, author = "Anonymous", title = "Special issue on persistent object systems", journal = j-VLDB-J, volume = "4", number = "3", pages = "??--??", month = jul, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:34 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Barbara:1995:SSO, author = "Daniel Barbar{\'a} and Tomasz Imielinski", title = "Special System-oriented Section: The Best of {SIGMOD} 1994: Sleepers and Workaholics: Caching Strategies in Mobile Environments", journal = j-VLDB-J, volume = "4", number = "4", pages = "567--602", month = oct, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Sep 27 08:46:01 MDT 2000", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Barbar=aacute=:Daniel.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/i/Imielinski:Tomasz.html", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Barbara:1995:SWC, author = "Daniel Barbar{\'a} and Tomasz Imieli{\'n}ski", title = "Sleepers and workaholics: caching strategies in mobile environments (extended version)", journal = j-VLDB-J, volume = "4", number = "4", pages = "567--602", month = oct, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:35 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In the mobile wireless computing environment of the future, a large number of users, equipped with low-powered palmtop machines, will query databases over wireless communication channels. Palmtop-based units will often be disconnected for prolonged periods of time, due to battery power saving measures; palmtops also will frequently relocate between different cells, and will connect to different data servers at different times. Caching of frequently accessed data items will be an important technique that will reduce contention on the narrow-bandwidth, wireless channel. However, cache individualization strategies will be severely affected by the disconnection and mobility of the clients. The server may no longer know which clients are currently residing under its cell, and which of them are currently on. We propose a taxonomy of different cache invalidation strategies, and study the impact of clients' disconnection times on their performance. We study ways to improve further the efficiency of the invalidation techniques described. We also describe how our techniques can be implemented over different network environments.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "caching; data management; information services; wireless", } @Article{Nyberg:1995:ACS, author = "Chris Nyberg and Tom Barclay and Zarka Cvetanovic and Jim Gray and David B. Lomet", title = "{AlphaSort}: a Cache-Sensitive Parallel External Sort", journal = j-VLDB-J, volume = "4", number = "4", pages = "603--627", month = oct, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:35 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Barclay:Tom.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Cvetanovic:Zarka.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Gray:Jim.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lomet:David_B=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/n/Nyberg:Chris.html", abstract = "A new sort algorithm, called AlphaSort, demonstrates that commodity processors and disks can handle commercial batch workloads. Using commodity processors, memory, and arrays of SCSI disks, AlphaSort runs the industry-standard sort benchmark in seven seconds. This beats the best published record on a 32-CPU 32-disk Hypercube by 8:1. On another benchmark, AlphaSort sorted more than a gigabyte in one minute. AlphaSort is a cache-sensitive, memory-intensive sort algorithm. We argue that modern architectures require algorithm designers to re-examine their use of the memory hierarchy. AlphaSort uses clustered data structures to get good cache locality, file striping to get high disk bandwidth, QuickSort to generate runs, and replacement-selection to merge the runs. It uses shared memory multiprocessors to break the sort into subsort chores. Because startup times are becoming a significant part of the total time, we propose two new benchmarks: (1) MinuteSort: how much can you sort in one minute, and (2) PennySort: how much can you sort for one penny.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Alpha; cache; DEC 7000; disk; memory; parallel; sort; striping", xxpages = "603--628", } @Article{White:1995:QHP, author = "Seth J. White and David J. DeWitt", title = "{QuickStore}: a High Performance Mapped Object Store", journal = j-VLDB-J, volume = "4", number = "4", pages = "629--673", month = oct, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:35 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/DeWitt:David_J=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/w/White:Seth_J=.html", abstract = "QuickStore is a memory-mapped storage system for persistent C++, built on top of the EXODUS Storage Manager. QuickStore provides fast access to in-memory objects by allowing application programs to access objects via normal virtual memory pointers. This article presents the results of a detailed performance study using the OO7 benchmark. The study compares the performance of QuickStore with the latest implementation of the E programming language. The QuickStore and E systems exemplify the two basic approaches (hardware and software) that have been used to implement persistence in object-oriented database systems. In addition, both systems use the same underlying storage manager and compiler, allowing us to make a truly apples-to-apples comparison of the hardware and software techniques.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "benchmark; client-server; memory-mapped; object-oriented; performance; pointer swizzling", } @Article{Swami:1995:EPF, author = "Arun N. Swami and K. Bernhard Schiefer", title = "Estimating Page Fetches for Index Scans with Finite {LRU} Buffers", journal = j-VLDB-J, volume = "4", number = "4", pages = "675--701", month = oct, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:35 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Schiefer:K=_Bernhard.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Swami:Arun_N=.html", abstract = "We describe an algorithm for estimating the number of page fetches for a partial or complete scan of a B-tree index. The algorithm obtains estimates for the number of page fetches for an index scan when given the number of tuples selected and the number of LRU buffers currently available. The algorithm has an initial phase that is performed exactly once before any estimates are calculated. This initial phase, involving LRU buffer modeling, requires a scan of all the index entries and calculates the number of page fetches for different buffer sizes. An approximate empirical model is obtained from this data. Subsequently, an inexpensive estimation procedure is called by the query optimizer whenever it needs an estimate of the page fetches for the index scan. This procedure utilizes the empirical model obtained in the initial phase.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "estimation; index scan; LRU; query optimization", } @Article{Landau:1995:HQA, author = "Gad M. Landau and Jeanette P. Schmidt and Vassilis J. Tsotras", title = "Historical queries along multiple lines of time evolution", journal = j-VLDB-J, volume = "4", number = "4", pages = "703--726", month = oct, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:35 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Traditional approaches to addressing historical queries assume a {\em single\/} line of time evolution; that is, a system (database, relation) evolves over time through a sequence of transactions. Each transaction always applies to the unique, current state of the system, resulting in a new current state. There are, however, complex applications where the system's state evolves into {\em multiple\/} lines of evolution. In general, this creates a tree (hierarchy) of evolution lines, where each tree node represents the time evolution of a particular subsystem. Multiple lines create novel historical queries, such as {\em vertical\/} or {\em horizontal\/} historical queries. The key characteristic of these problems is that portions of the history are shared; answering historical queries should not necessitate duplication of shared histories as this could increase the storage requirements dramatically. Both the vertical and horizontal historical queries have two parts: a `search' part, where the time of interest is located together with the appropriate subsystem, and a reconstruction part, where the subsystem's state is reconstructed for that time. This article focuses on the search part; several reconstruction methods, designed for single evolution lines can be applied once the appropriate time of interest is located. For both the vertical and the horizontal historical queries, we present algorithms that work without duplicating shared histories. Combinations of the vertical and horizontal queries are possible, and enable searching in both dimensions of the tree of evolutions.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "access methods; CAD databases; data-structures; rollback databases", } @Article{Landau:1995:RJA, author = "Gad M. Landau and Jeanette P. Schmidt and Vassilis J. Tsotras", title = "Regular Journal Articles: Historical Queries Along Multiple Lines of Time Evolution", journal = j-VLDB-J, volume = "4", number = "4", pages = "703--726", month = oct, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Sep 27 08:46:01 MDT 2000", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Landau:Gad_M=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Schmidt:Jeanette_P=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Tsotras:Vassilis_J=.html", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Abiteboul:1995:PLM, author = "Serge Abiteboul and Catriel Beeri", title = "The Power of Languages for the Manipulation of Complex Values", journal = j-VLDB-J, volume = "4", number = "4", pages = "727--794", month = oct, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:35 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Abiteboul:Serge.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Beeri:Catriel.html", abstract = "Various models and languages for describing and manipulating hierarchically structured data have been proposed. Algebraic, calculus-based, and logic-programming oriented languages have all been considered. This article presents a general model for complex values (i.e., values with hierarchical structures), and languages for it based on the three paradigms. The algebraic language generalizes those presented in the literature; it is shown to be related to the functional style of programming advocated by Backus (1978). The notion of domain independence (from relational databases) is defined, and syntactic restrictions (referred to as safety conditions) on calculus queries are formulated to guarantee domain independence. The main results are: The domain-independent calculus, the safe calculus, the algebra, and the logic-programming oriented language have equivalent expressive power. In particular, recursive queries, such as the transitive closure, can be expressed in each of the languages. For this result, the algebra needs the powerset operation. A more restricted version of safety is presented, such that the restricted safe calculus is equivalent to the algebra without the powerset. The results are extended to the case where arbitrary functions and predicates are used in the languages.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "complex object; complex value; database; database model; query language", } @Article{Anonymous:1995:SSO, author = "Anonymous", title = "Special system-oriented section: the best of {SIGMOD} `94", journal = j-VLDB-J, volume = "4", number = "4", pages = "??--??", month = oct, year = "1995", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:35 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{DeWitt:1996:POT, author = "David J. {De Witt} and Jeffrey F. Naughton and John C. Shafer and Shivakumar Venkataraman", title = "Parallelizing {OODBMS} traversals: a performance evaluation", journal = j-VLDB-J, volume = "5", number = "1", pages = "3--18", month = jan, year = "1996", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:36 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html; http://link.springer.de/link/service/journals/00778/tocs/t6005001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/DeWitt:David_J=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/n/Naughton:Jeffrey_F=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Shafer:John_C=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/v/Venkataraman:Shivakumar.html; http://link.springer.de/link/service/journals/00778/bibs/6005001/60050003.htm; http://link.springer.de/link/service/journals/00778/papers/6005001/60050003.pdf; http://link.springer.de/link/service/journals/00778/papers/6005001/60050003.ps.gz", abstract = "In this paper we describe the design and implementation of {\em ParSets}, a means of exploiting parallelism in the SHORE OODBMS. We used ParSets to parallelize the graph traversal portion of the OO7 OODBMS benchmark, and present speedup and scaleup results from parallel SHORE running these traversals on a cluster of commodity workstations connected by a standard Ethernet. For some OO7 traversals, SHORE achieved excellent speedup and scaleup; for other OO7 traversals, only marginal speedup and scaleup occurred. The characteristics of these traversals shed light on when the ParSet approach to parallelism can and cannot be applied to speed up an application.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Object-oriented database management systems; Parallelism; ParSets; SHORE", } @Article{Sivasankaran:1996:PAR, author = "Rajendran M. Sivasankaran and John A. Stankovic and Donald F. Towsley and Bhaskar Purimetla and Krithi Ramamritham", title = "Priority Assignment in Real-Time Active Databases", journal = j-VLDB-J, volume = "5", number = "1", pages = "19--34", month = jan, year = "1996", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:36 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html; http://link.springer.de/link/service/journals/00778/tocs/t6005001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Purimetla:Bhaskar.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Ramamritham:Krithi.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sivasankaran:Rajendran_M=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Stankovic:John_A=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Towsley:Donald_F=.html; http://link.springer.de/link/service/journals/00778/bibs/6005001/60050019.htm; http://link.springer.de/link/service/journals/00778/papers/6005001/60050019.pdf; http://link.springer.de/link/service/journals/00778/papers/6005001/60050019.ps.gz", abstract = "Active databases and real-time databases have been important areas of research in the recent past. It has been recognized that many benefits can be gained by integrating real-time and active database technologies. However, not much work has been done in the area of transaction processing in real-time active databases. This paper deals with an important aspect of transaction processing in real-time active databases, namely the problem of assigning priorities to transactions. In these systems, time-constrained transactions trigger other transactions during their execution. We present three policies for assigning priorities to parent, immediate and deferred transactions executing on a multiprocessor system and then evaluate the policies through simulation. The policies use different amounts of semantic information about transactions to assign the priorities. The simulator has been validated against the results of earlier published studies. We conducted experiments in three settings: a task setting, a main memory database setting and a disk-resident database setting. Our results demonstrate that dynamically changing the priorities of transactions, depending on their behavior (triggering rules), yields a substantial improvement in the number of triggering transactions that meet their deadline in all three settings.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Active databases; Coupling mode; Deadlines; ECA-priority assignment; Real-time databases", } @Article{Keller:1996:PBC, author = "Arthur M. Keller and Julie Basu", title = "A Predicate-based Caching Scheme for Client-Server Database Architectures", journal = j-VLDB-J, volume = "5", number = "1", pages = "35--47", month = jan, year = "1996", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:36 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html; http://link.springer.de/link/service/journals/00778/tocs/t6005001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Basu:Julie.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Keller:Arthur_M=.html; http://link.springer.de/link/service/journals/00778/bibs/6005001/60050035.htm; http://link.springer.de/link/service/journals/00778/papers/6005001/60050035.pdf; http://link.springer.de/link/service/journals/00778/papers/6005001/60050035.ps.gz", abstract = "We propose a new client-side data-caching scheme for relational databases with a central server and multiple clients. Data are loaded into each client cache based on queries executed on the central database at the server. These queries are used to form predicates that describe the cache contents. A subsequent query at the client may be satisfied in its local cache if we can determine that the query result is entirely contained in the cache. This issue is called {\em cache completeness}. A separate issue, {\em cache currency}, deals with the effect on client caches of updates committed at the central database. We examine the various performance tradeoffs and optimization issues involved in addressing the questions of cache currency and completeness using predicate descriptions and suggest solutions that promote good dynamic behavior. Lower query-response times, reduced message traffic, higher server throughput, and better scalability are some of the expected benefits of our approach over commonly used relational server-side and object ID-based or page-based client-side caching.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "cache completeness; cache currency; caching; multiple clients; relational databases", } @Article{Stonebraker:1996:MWA, author = "Michael Stonebraker and Paul M. Aoki and Witold Litwin and Avi Pfeffer and Adam Sah and Jeff Sidell and Carl Staelin and Andrew Yu", title = "{Mariposa}: a Wide-Area Distributed Database System", journal = j-VLDB-J, volume = "5", number = "1", pages = "48--63", month = jan, year = "1996", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:36 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html; http://link.springer.de/link/service/journals/00778/tocs/t6005001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Aoki:Paul_M=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Litwin:Witold.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Pfeffer:Avi.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sah:Adam.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sidell:Jeff.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Staelin:Carl.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Stonebraker:Michael.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/y/Yu:Andrew.html; http://link.springer.de/link/service/journals/00778/bibs/6005001/60050048.htm; http://link.springer.de/link/service/journals/00778/papers/6005001/60050048.pdf; http://link.springer.de/link/service/journals/00778/papers/6005001/60050048.ps.gz", abstract = "The requirements of wide-area distributed database systems differ dramatically from those of local-area network systems. In a wide-area network (WAN) configuration, individual sites usually report to different system administrators, have different access and charging algorithms, install site-specific data type extensions, and have different constraints on servicing remote requests. Typical of the last point are production transaction environments, which are fully engaged during normal business hours, and cannot take on additional load. Finally, there may be many sites participating in a WAN distributed DBMS.In this world, a single program performing global query optimization using a cost-based optimizer will not work well. Cost-based optimization does not respond well to site-specific type extension, access constraints, charging algorithms, and time-of-day constraints. Furthermore, traditional cost-based distributed optimizers do not scale well to a large number of possible processing sites. Since traditional distributed DBMSs have all used cost-based optimizers, they are not appropriate in a WAN environment, and a new architecture is required. We have proposed and implemented an economic paradigm as the solution to these issues in a new distributed DBMS called Mariposa. In this paper, we present the architecture and implementation of Mariposa and discuss early feedback on its operating characteristics.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "autonomy; databases; distributed systems; economic site; name service; wide-area network", } @Article{Harris:1996:JAC, author = "Evan P. Harris and Kotagiri Ramamohanarao", title = "Join Algorithm Costs Revisited", journal = j-VLDB-J, volume = "5", number = "1", pages = "64--84", month = jan, year = "1996", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:36 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html; http://link.springer.de/link/service/journals/00778/tocs/t6005001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Harris:Evan_P=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Ramamohanarao:Kotagiri.html; http://link.springer.de/link/service/journals/00778/bibs/6005001/60050064.htm; http://link.springer.de/link/service/journals/00778/papers/6005001/60050064.pdf; http://link.springer.de/link/service/journals/00778/papers/6005001/60050064.ps.gz", abstract = "A method of analysing join algorithms based upon the time required to access, transfer and perform the relevant CPU-based operations on a disk page is proposed. The costs of variations of several of the standard join algorithms, including nested block, sort-merge, GRACE hash and hybrid hash, are presented. For a given total buffer size, the cost of these join algorithms depends on the parts of the buffer allocated for each purpose. For example, when joining two relations using the nested block join algorithm, the amount of buffer space allocated for the outer and inner relations can significantly affect the cost of the join. Analysis of expected and experimental results of various join algorithms show that a combination of the optimal nested block and optimal GRACE hash join algorithms usually provide the greatest cost benefit, unless the relation size is a small multiple of the memory size. Algorithms to quickly determine a buffer allocation producing the minimal cost for each of these algorithms are presented. When the relation size is a small multiple of the amount of main memory available (typically up to three to six times), the hybrid hash join algorithm is preferable.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "join algorithms; minimisation; optimal buffer allocation", } @Article{Ramamritham:1996:TCC, author = "Krithi Ramamritham and Panos K. Chrysanthis", title = "A taxonomy of correctness criteria in database applications (*)", journal = j-VLDB-J, volume = "5", number = "1", pages = "85--97", month = jan, year = "1996", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:36 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html; http://link.springer.de/link/service/journals/00778/tocs/t6005001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chrysanthis:Panos_K=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Ramamritham:Krithi.html; http://link.springer.de/link/service/journals/00778/bibs/6005001/60050085.htm; http://link.springer.de/link/service/journals/00778/papers/6005001/60050085.pdf; http://link.springer.de/link/service/journals/00778/papers/6005001/60050085.ps.gz", abstract = "Whereas serializability captures {\em database consistency requirements\/} and {\em transaction correctness properties\/} via a single notion, recent research has attempted to come up with correctness criteria that view these two types of requirements independently. The search for more flexible correctness criteria is partly motivated by the introduction of new transaction models that extend the traditional atomic transaction model. These extensions came about because the atomic transaction model in conjunction with serializability is found to be very constraining when used in advanced applications (e.g., design databases) that function in distributed, cooperative, and heterogeneous environments. In this article we develop a taxonomy of various {\em correctness criteria\/} that focus on database consistency requirements and transaction correctness properties from the viewpoint of {\em what\/} the different dimensions of these two are. This taxonomy allows us to categorize correctness criteria that have been proposed in the literature. To help in this categorization, we have applied a uniform specification technique, based on ACTA, to express the various criteria. Such a categorization helps shed light on the similarities and differences between different criteria and places them in perspective.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "concurrency control; database correctness criteria; formal specifications; transaction processing", } @Article{Tsatalos:1996:GVT, author = "Odysseas G. Tsatalos and Marvin H. Solomon and Yannis E. Ioannidis", title = "The {GMAP}: a Versatile Tool for Physical Data Independence", journal = j-VLDB-J, volume = "5", number = "2", pages = "101--118", month = apr, year = "1996", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:38 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html; http://link.springer.de/link/service/journals/00778/tocs/t6005002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/i/Ioannidis:Yannis_E=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Solomon:Marvin_H=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Tsatalos:Odysseas_G=.html; http://link.springer.de/link/service/journals/00778/bibs/6005002/60050101.htm; http://link.springer.de/link/service/journals/00778/papers/6005002/60050101.pdf; http://link.springer.de/link/service/journals/00778/papers/6005002/60050101.ps.gz", abstract = "Physical data independence is touted as a central feature of modern database systems. It allows users to frame queries in terms of the logical structure of the data, letting a query processor automatically translate them into optimal plans that access physical storage structures. Both relational and object-oriented systems, however, force users to frame their queries in terms of a logical schema that is directly tied to physical structures. We present an approach that eliminates this dependence. All storage structures are defined in a declarative language based on relational algebra as functions of a logical schema. We present an algorithm, integrated with a conventional query optimizer, that translates queries over this logical schema into plans that access the storage structures. We also show how to compile update requests into plans that update all relevant storage structures consistently and optimally. Finally, we report on experiments with a prototype implementation of our approach that demonstrate how it allows storage structures to be tuned to the expected or observed workload to achieve significantly better performance than is possible with conventional techniques.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "indexing; materialized views; physical data independence; physical database design", } @Article{Poulovassilis:1996:AQO, author = "Alexandra Poulovassilis and Carol Small", title = "Algebraic Query Optimisation for Database Programming Languages", journal = j-VLDB-J, volume = "5", number = "2", pages = "119--132", month = apr, year = "1996", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:38 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html; http://link.springer.de/link/service/journals/00778/tocs/t6005002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Poulovassilis:Alexandra.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Small:Carol.html; http://link.springer.de/link/service/journals/00778/bibs/6005002/60050119.htm; http://link.springer.de/link/service/journals/00778/papers/6005002/60050119.pdf; http://link.springer.de/link/service/journals/00778/papers/6005002/60050119.ps.gz", abstract = "A major challenge still facing the designers and implementors of database programming languages (DBPLs) is that of query optimisation. We investigate algebraic query optimisation techniques for DBPLs in the context of a purely declarative functional language that supports sets as first-class objects. Since the language is computationally complete issues such as non-termination of expressions and construction of infinite data structures can be investigated, whilst its declarative nature allows the issue of side effects to be avoided and a richer set of equivalences to be developed. The language has a well-defined semantics which permits us to reason formally about the properties of expressions, such as their equivalence with other expressions and their termination. The support of a set bulk data type enables much prior work on the optimisation of relational languages to be utilised. In the paper we first give the syntax of our archetypal DBPL and briefly discuss its semantics. We then define a small but powerful algebra of operators over the set data type, provide some key equivalences for expressions in these operators, and list transformation principles for optimising expressions. Along the way, we identify some caveats to well-known equivalences for non-deductive database languages. We next extend our language with two higher level constructs commonly found in functional DBPLs: set comprehensions and functions with known inverses. Some key equivalences for these constructs are provided, as are transformation principles for expressions in them. Finally, we investigate extending our equivalences for the set operators to the analogous operators over bags. Although developed and formally proved in the context of a functional language, our findings are directly applicable to other DBPLs of similar expressiveness.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "algebraic manipulation; database management; database programming languages; functional languages; query optimisation", } @Article{Amiel:1996:TSR, author = "Eric Amiel and Marie-Jo Bellosta and Eric Dujardin and Eric Simon", title = "Type-safe Relaxing of Schema Consistency Rules for Flexible Modeling in {OODBMS}", journal = j-VLDB-J, volume = "5", number = "2", pages = "133--150", month = apr, year = "1996", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:38 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html; http://link.springer.de/link/service/journals/00778/tocs/t6005002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Amiel:Eric.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Bellosta:Marie=Jo.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Dujardin:Eric.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Simon:Eric.html; http://link.springer.de/link/service/journals/00778/bibs/6005002/60050133.htm; http://link.springer.de/link/service/journals/00778/papers/6005002/60050133.pdf; http://link.springer.de/link/service/journals/00778/papers/6005002/60050133.ps.gz", abstract = "Object-oriented databases enforce behavioral schema consistency rules to guarantee type safety, i.e., that no run-time type error can occur. When the schema must evolve, some schema updates may violate these rules. In order to maintain behavioral schema consistency, traditional solutions require significant changes to the types, the type hierarchy and the code of existing methods. Such operations are very expensive in a database context. To ease schema evolution, we propose to support exceptions to the behavioral consistency rules without sacrificing type safety. The basic idea is to detect unsafe statements in a method code at compile-time and check them at run-time. The run-time check is performed by a specific clause that is automatically inserted around unsafe statements. This check clause warns the programmer of the safety problem and lets him provide exception-handling code. Schema updates can therefore be performed with only minor changes to the code of methods.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "contravariance; covariance; object-oriented databases; schema evolution; type safety", xxtitle = "Type-safe relaxing of schema consistency rules for flexible modelling in {OODBMS}", } @Article{Fang:1996:EOB, author = "Doug Fang and Shahram Ghandeharizadeh and Dennis McLeod", title = "An experimental object-based sharing system for networked databases", journal = j-VLDB-J, volume = "5", number = "2", pages = "151--165", month = apr, year = "1996", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:38 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html; http://link.springer.de/link/service/journals/00778/tocs/t6005002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/f/Fang:Doug.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Ghandeharizadeh:Shahram.html; http://link.springer.de/link/service/journals/00778/bibs/6005002/60050151.htm; http://link.springer.de/link/service/journals/00778/papers/6005002/60050151.pdf; http://link.springer.de/link/service/journals/00778/papers/6005002/60050151.ps.gz; http://link.springer.de/link/service/journals/00778/tocs/mailto:helpdesk@link.springer.de", abstract = "An approach and mechanism for the transparent sharing of objects in an environment of interconnected (networked), autonomous database systems is presented. An experimental prototype system has been designed and implemented, and an analysis of its performance conducted. Previous approaches to sharing in this environment typically rely on the use of a global, integrated conceptual database schema; users and applications must pose queries at this new level of abstraction to access remote information. By contrast, our approach provides a mechanism that allows users to import remote objects directly into their local database transparently; access to remote objects is virtually the same as access to local objects. The experimental prototype system that has been designed and implemented is based on the Iris and Omega object-based database management systems; this system supports the sharing of data and meta-data objects (information units) as well as units of behavior. The results of experiments conducted to evaluate the performance of our mechanism demonstrate the feasibility of database transparent object sharing in a federated environment, and provide insight into the performance overhead and tradeoffs involved.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "database system interoperability; experimental prototype benchmarking; object sharing", xxtitle = "An Experimental System for Object-Based Sharing in Federated Databases", } @Article{Dey:1996:CTR, author = "Debabrata Dey and Terence M. Barron and Veda C. Storey", title = "A Complete Temporal Relational Algebra", journal = j-VLDB-J, volume = "5", number = "3", pages = "167--180", month = aug, year = "1996", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:39 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html; http://link.springer.de/link/service/journals/00778/tocs/t6005003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Barron:Terence_M=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Dey:Debabrata.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Storey:Veda_C=.html; http://link.springer.de/link/service/journals/00778/bibs/6005003/60050167.htm; http://link.springer.de/link/service/journals/00778/papers/6005003/60050167.pdf; http://link.springer.de/link/service/journals/00778/papers/6005003/60050167.ps.gz", abstract = "Various temporal extensions to the relational model have been proposed. All of these, however, deviate significantly from the original relational model. This paper presents a temporal extension of the relational algebra that is not significantly different from the original relational model, yet is at least as expressive as any of the previous approaches. This algebra employs multidimensional tuple time-stamping to capture the complete temporal behavior of data. The basic relational operations are redefined as consistent extensions of the existing operations in a manner that preserves the basic algebraic equivalences of the snapshot (i.e., conventional static) algebra. A new operation, namely {\em temporal projection}, is introduced. The complete update semantics are formally specified and aggregate functions are defined. The algebra is closed, and reduces to the snapshot algebra. It is also shown to be at least as expressive as the calculus-based temporal query language TQuel. In order to assess the algebra, it is evaluated using a set of twenty-six criteria proposed in the literature, and compared to existing temporal relational algebras. The proposed algebra appears to satisfy more criteria than any other existing algebra.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "historical databases; relational algebra; temporal databases; transaction time; valid time", remark = "Check month: July or August??", } @Article{Shyy:1996:DIK, author = "Yuh-Ming Shyy and Javier Arroyo and Stanley Y. W. Su and Herman Lam", title = "The Design and Implementation of {K}: a High-Level Knowledge-Base Programming Language of {OSAM*.KBMS}", journal = j-VLDB-J, volume = "5", number = "3", pages = "181--195", month = aug, year = "1996", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:39 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html; http://link.springer.de/link/service/journals/00778/tocs/t6005003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Arroyo:Javier.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lam:Herman.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Shyy:Yuh=Ming.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Su:Stanley_Y=_W=.html; http://link.springer.de/link/service/journals/00778/bibs/6005003/60050181.htm; http://link.springer.de/link/service/journals/00778/papers/6005003/60050181.pdf; http://link.springer.de/link/service/journals/00778/papers/6005003/60050181.ps.gz", abstract = "The OSAM*.KBMS is a knowledge-base management system, or the so-called next-generation database management system, for non-traditional data/knowledge-intensive applications. In order to define, query, and manipulate a knowledge base, as well as to write codes to implement any application system, we have developed an object-oriented knowledge-base programming language called K to serve as the high-level interface of OSAM*.KBMS. This paper presents the design of K, its implementation, and its supporting KBMS developed at the Database Systems Research and Development Center of the University of Florida.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "abstractions; association patterns; knowledge-base programming language; object-oriented knowledge model; structural associations", remark = "Check month: July or August??", } @Article{Harder:1996:APS, author = "Theo H{\"a}rder and Joachim Reinert", title = "Access Path Support for Referential Integrity in {SQL2}", journal = j-VLDB-J, volume = "5", number = "3", pages = "196--214", month = aug, year = "1996", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:39 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html; http://link.springer.de/link/service/journals/00778/tocs/t6005003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/H=auml=rder:Theo.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Reinert:Joachim.html; http://link.springer.de/link/service/journals/00778/bibs/6005003/60050196.htm; http://link.springer.de/link/service/journals/00778/papers/6005003/60050196.pdf; http://link.springer.de/link/service/journals/00778/papers/6005003/60050196.ps.gz", abstract = "The relational model of data incorporates fundamental assertions for entity integrity and referential integrity. Recently, these so-called relational invariants were more precisely specified by the new SQL2 standard. Accordingly, they have to be guaranteed by a relational DBMS to its users and, therefore, all issues of semantics and implementation became very important. The specification of referential integrity embodies quite a number of complications including the MATCH clause and a collection of referential actions. In particular, $ \hbox {{\tt MATCH PARTIAL}} $ turns out to be hard to understand and, if applied, difficult and expensive to maintain. In this paper, we identify the functional requirements for preserving referential integrity. At a level free of implementational considerations, the number and kinds of searches necessary for referential integrity maintenance are derived. Based on these findings, our investigation is focused on the question of how the functional requirements can be supported by implementation concepts in an efficient way. We determine the search cost for referential integrity maintenance (in terms of page references) for various possible access path structures. Our main result is that a combined access path structure is the most appropriate for checking the regular MATCH option, whereas $ \hbox {{\tt MATCH PARTIAL}} $ requires very expensive and complicated check procedures. If it cannot be avoided at all, the best support is achieved by a combination of multiple $ \mbox {B}^*$-trees.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "access path support; MATCH clause; referential integrity; relational databases; SQL2", remark = "Check month: July or August??", } @Article{Ooi:1996:INE, author = "Beng Chin Ooi and Jiawei Han and Hongjun Lu and Kian Lee Tan", title = "Index Nesting --- An Efficient Approach to Indexing in Object-Oriented Databases", journal = j-VLDB-J, volume = "5", number = "3", pages = "215--228", month = aug, year = "1996", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:39 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html; http://link.springer.de/link/service/journals/00778/tocs/t6005003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Han:Jiawei.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lu:Hongjun.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/o/Ooi:Beng_Chin.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Tan:Kian=Lee.html; http://link.springer.de/link/service/journals/00778/bibs/6005003/60050215.htm; http://link.springer.de/link/service/journals/00778/papers/6005003/60050215.pdf; http://link.springer.de/link/service/journals/00778/papers/6005003/60050215.ps.gz; http://link.springer.de/link/service/journals/00778/tocs/mailto:helpdesk@link.springer.de", abstract = "In object-oriented database systems where the concept of the superclass-subclass is supported, an instance of a subclass is also an instance of its superclass. Consequently, the access scope of a query against a class in general includes the access scope of all its subclasses, unless specified otherwise. An index to support superclass-subclass relationship efficiently must provide efficient associative retrievals of objects from a single class or from several classes in a class hierarchy. This paper presents an efficient index called the hierarchical tree (the H-tree). For each class, an H-tree is maintained, allowing efficient search on a single class. These H-trees are appropriately linked to capture the superclass-subclass relationships, thus allowing efficient retrievals of instances from a class hierarchy. Both experimental and analytical results indicate that the H-tree is an efficient indexing structure.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "indexing structures; OODB; query retrieval", remark = "Check month: July or August??", } @Article{Antoshenkov:1996:QPO, author = "Gennady Antoshenkov and Mohamed Ziauddin", title = "Query Processing and Optimization in {Oracle Rdb}", journal = j-VLDB-J, volume = "5", number = "4", pages = "229--237", month = dec, year = "1996", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:39 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html; http://link.springer.de/link/service/journals/00778/tocs/t6005004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Antoshenkov:Gennady.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/z/Ziauddin:Mohamed.html; http://link.springer.de/link/service/journals/00778/bibs/6005004/60050229.htm; http://link.springer.de/link/service/journals/00778/papers/6005004/60050229.pdf; http://link.springer.de/link/service/journals/00778/papers/6005004/60050229.ps.gz", abstract = "This paper contains an overview of the technology used in the query processing and optimization component of Oracle Rdb, a relational database management system originally developed by Digital Equipment Corporation and now under development by Oracle Corporation. Oracle Rdb is a production system that supports the most demanding database applications, runs on multiple platforms and in a variety of environments.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "dynamic optimization; optimizer; query transformation; relational database; sampling", } @Article{Mylopoulos:1996:BKB, author = "John Mylopoulos and Vinay K. Chaudhri and Dimitris Plexousakis and Adel Shrufi and Thodoros Topologlou", title = "Building Knowledge Base Management Systems", journal = j-VLDB-J, volume = "5", number = "4", pages = "238--263", month = dec, year = "1996", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:39 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html; http://link.springer.de/link/service/journals/00778/tocs/t6005004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chaudhri:Vinay_K=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Mylopoulos:John.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Plexousakis:Dimitris.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Shrufi:Adel.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Topaloglou:Thodoros.html; http://link.springer.de/link/service/journals/00778/bibs/6005004/60050238.htm; http://link.springer.de/link/service/journals/00778/papers/6005004/60050238.pdf; http://link.springer.de/link/service/journals/00778/papers/6005004/60050238.ps.gz", abstract = "Advanced applications in fields such as CAD, software engineering, real-time process control, corporate repositories and digital libraries require the construction, efficient access and management of large, shared knowledge bases. Such knowledge bases cannot be built using existing tools such as expert system shells, because these do not scale up, nor can they be built in terms of existing database technology, because such technology does not support the rich representational structure and inference mechanisms required for knowledge-based systems. This paper proposes a generic architecture for a knowledge base management system intended for such applications. The architecture assumes an object-oriented knowledge representation language with an assertional sublanguage used to express constraints and rules. It also provides for general-purpose deductive inference and special-purpose temporal reasoning. Results reported in the paper address several knowledge base management issues. For storage management, a new method is proposed for generating a logical schema for a given knowledge base. Query processing algorithms are offered for semantic and physical query optimization, along with an enhanced cost model for query cost estimation. On concurrency control, the paper describes a novel concurrency control policy which takes advantage of knowledge base structure and is shown to outperform two-phase locking for highly structured knowledge bases and update-intensive transactions. Finally, algorithms for compilation and efficient processing of constraints and rules during knowledge base operations are described. The paper describes original results, including novel data structures and algorithms, as well as preliminary performance evaluation data. Based on these results, we conclude that knowledge base management systems which can accommodate large knowledge bases are feasible.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "concurrency control; constraint enforcement; knowledge base management systems; rule management; storage management", } @Article{Becker:1996:AOM, author = "Bruno Becker and Stephan Gschwind and Thomas Ohler and Bernhard Seeger and Peter Widmayer", title = "An Asymptotically Optimal Multiversion {B}-Tree", journal = j-VLDB-J, volume = "5", number = "4", pages = "264--275", month = dec, year = "1996", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:39 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html; http://link.springer.de/link/service/journals/00778/tocs/t6005004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Becker:Bruno.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Gschwind:Stephan.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/o/Ohler:Thomas.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Seeger:Bernhard.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/w/Widmayer:Peter.html; http://link.springer.de/link/service/journals/00778/bibs/6005004/60050264.htm; http://link.springer.de/link/service/journals/00778/papers/6005004/60050264.pdf; http://link.springer.de/link/service/journals/00778/papers/6005004/60050264.ps.gz", abstract = "In a variety of applications, we need to keep track of the development of a data set over time. For maintaining and querying these multiversion data efficiently, external storage structures are an absolute necessity. We propose a multiversion B-tree that supports insertions and deletions of data items at the current version and range queries and exact match queries for any version, current or past. Our multiversion B-tree is asymptotically optimal in the sense that the time and space bounds are asymptotically the same as those of the (single-version) B-tree in the worst case. The technique we present for transforming a (single-version) B-tree into a multiversion B-tree is quite general: it applies to a number of hierarchical external access structures with certain properties directly, and it can be modified for others.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "access methods; information systems; physical design; versioned data", } @Article{Kashyap:1996:SSS, author = "Vipul Kashyap and Amit P. Sheth", title = "Semantic and Schematic Similarities Between Database Objects: a Context-Based Approach", journal = j-VLDB-J, volume = "5", number = "4", pages = "276--304", month = dec, year = "1996", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:39 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html; http://link.springer.de/link/service/journals/00778/tocs/t6005004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kashyap:Vipul.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sheth:Amit_P=.html; http://link.springer.de/link/service/journals/00778/bibs/6005004/60050276.htm; http://link.springer.de/link/service/journals/00778/papers/6005004/60050276.pdf; http://link.springer.de/link/service/journals/00778/papers/6005004/60050276.ps.gz; http://link.springer.de/link/service/journals/00778/tocs/mailto:helpdesk@link.springer.de", abstract = "In a multidatabase system, schematic conflicts between two objects are usually of interest only when the objects have some semantic similarity. We use the concept of {\em semantic proximity}, which is essentially an {\em abstraction/mapping\/} between the domains of the two objects associated with the {\em context of comparison}. An explicit though partial context representation is proposed and the specificity relationship between contexts is defined. The contexts are organized as a meet semi-lattice and associated operations like the greatest lower bound are defined. The context of comparison and the type of abstractions used to relate the two objects form the basis of a semantic taxonomy. At the {\em semantic level}, the intensional description of database objects provided by the context is expressed using description logics. The terms used to construct the contexts are obtained from {\em domain-specific ontologies}. {\em Schema correspondences\/} are used to store mappings from the semantic level to the data level and are associated with the respective contexts. Inferences about database content at the federation level are modeled as changes in the context and the associated schema correspondences. We try to reconcile the dual (schematic and semantic) perspectives by enumerating {\em possible semantic similarities\/} between objects having schema and data conflicts, and modeling schema correspondences as the projection of semantic proximity {\em with respect to (wrt)\/} context.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Evangelidis:1997:HTM, author = "Georgios Evangelidis and David B. Lomet and Betty Salzberg", title = "The {hB} {$^{\Pi }$}{-tree}: a multi-attribute index supporting concurrency, recovery and node consolidation", journal = j-VLDB-J, volume = "6", number = "1", pages = "1--25", month = feb, year = "1997", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:40 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html; http://link.springer.de/link/service/journals/00778/tocs/t7006001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/e/Evangelidis:Georgios.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lomet:David_B=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Salzberg:Betty.html; http://link.springer.de/link/service/journals/00778/bibs/7006001/70060001.htm; http://link.springer.de/link/service/journals/00778/papers/7006001/70060001.pdf; http://link.springer.de/link/service/journals/00778/papers/7006001/70060001.ps.gz", abstract = "We propose a new multi-attribute index. Our approach combines the hB-tree, a multi-attribute index, and the $ \Pi $-tree, an abstract index which offers efficient concurrency and recovery methods. We call the resulting method the hB $^\Pi $-tree. We describe several versions of the hB $^\Pi $-tree, each using a different node-splitting and index-term-posting algorithm. We also describe a new node deletion algorithm. We have implemented all the versions of the hB $^\Pi $-tree. Our performance results show that even the version that offers no performance guarantees, actually performs very well in terms of storage utilization, index size (fan-out), exact-match and range searching, under various data types and distributions. We have also shown that our index is fairly insensitive to increases in dimension. Thus, it is suitable for indexing high-dimensional applications. This property and the fact that all our versions of the hB $^\Pi $-tree can use the $ \Pi $-tree concurrency and recovery algorithms make the hB $^\Pi $-tree a promising candidate for inclusion in a general-purpose DBMS.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "concurrency; multi-attribute index; node consolidation; recovery", remark = "Check month: January or February??", } @Article{Antoshenkov:1997:DBO, author = "Gennady Antoshenkov", title = "Dictionary-based order-preserving string compression (*)", journal = j-VLDB-J, volume = "6", number = "1", pages = "26--39", month = feb, year = "1997", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:40 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html; http://link.springer.de/link/service/journals/00778/tocs/t7006001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Antoshenkov:Gennady.html; http://link.springer.de/link/service/journals/00778/bibs/7006001/70060026.htm; http://link.springer.de/link/service/journals/00778/papers/7006001/70060026.pdf; http://link.springer.de/link/service/journals/00778/papers/7006001/70060026.ps.gz", abstract = "As no database exists without indexes, no index implementation exists without order-preserving key compression, in particular, without prefix and tail compression. However, despite the great potentials of making indexes smaller and faster, application of general compression methods to ordered data sets has advanced very little. This paper demonstrates that the fast dictionary-based methods can be applied to order-preserving compression almost with the same freedom as in the general case. The proposed new technology has the same speed and a compression rate only marginally lower than the traditional order-indifferent dictionary encoding. Procedures for encoding and generating the encode tables are described covering such order-related features as ordered data set restrictions, sensitivity and insensitivity to a character position, and one-symbol encoding of each frequent trailing character sequence. The experimental results presented demonstrate five-folded compression on real-life data sets and twelve-folded compression on Wisconsin benchmark text fields.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "indexing; order-preserving key compression", remark = "Check month: January or February??", } @Article{Singhal:1997:ALB, author = "Vigyan Singhal and Alan Jay Smith", title = "Analysis of Locking Behavior in Three Real Database Systems", journal = j-VLDB-J, volume = "6", number = "1", pages = "40--52", month = feb, year = "1997", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:40 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html; http://link.springer.de/link/service/journals/00778/tocs/t7006001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Singhal:Vigyan.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Smith:Alan_Jay.html; http://link.springer.de/link/service/journals/00778/bibs/7006001/70060040.htm; http://link.springer.de/link/service/journals/00778/papers/7006001/70060040.pdf; http://link.springer.de/link/service/journals/00778/papers/7006001/70060040.ps.gz", abstract = "Concurrency control is essential to the correct functioning of a database due to the need for correct, reproducible results. For this reason, and because concurrency control is a well-formulated problem, there has developed an enormous body of literature studying the performance of concurrency control algorithms. Most of this literature uses either analytic modeling or random number-driven simulation, and explicitly or implicitly makes certain assumptions about the behavior of transactions and the patterns by which they set and unset locks. Because of the difficulty of collecting suitable measurements, there have been only a few studies which use trace-driven simulation, and still less study directed toward the characterization of concurrency control behavior of real workloads. In this paper, we present a study of three database workloads, all taken from IBM DB2 relational database systems running commercial applications in a production environment. This study considers topics such as frequency of locking and unlocking, deadlock and blocking, duration of locks, types of locks, correlations between applications of lock types, two-phase versus non-two-phase locking, when locks are held and released, etc. In each case, we evaluate the behavior of the workload relative to the assumptions commonly made in the research literature and discuss the extent to which those assumptions may or may not lead to erroneous conclusions.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "concurrency control; trace-driven simulation; workload characterization", remark = "Check month: January or February??", } @Article{Mehta:1997:DPS, author = "Manish Mehta and David J. DeWitt", title = "Data placement in shared-nothing parallel database systems (*)", journal = j-VLDB-J, volume = "6", number = "1", pages = "53--72", month = feb, year = "1997", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:40 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html; http://link.springer.de/link/service/journals/00778/tocs/t7006001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/DeWitt:David_J=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Mehta:Manish.html; http://link.springer.de/link/service/journals/00778/bibs/7006001/70060053.htm; http://link.springer.de/link/service/journals/00778/papers/7006001/70060053.pdf; http://link.springer.de/link/service/journals/00778/papers/7006001/70060053.ps.gz; http://link.springer.de/link/service/journals/00778/tocs/mailto:helpdesk@link.springer.de", abstract = "Data placement in shared-nothing database systems has been studied extensively in the past and various placement algorithms have been proposed. However, there is no consensus on the most efficient data placement algorithm and placement is still performed manually by a database administrator with periodic reorganization to correct mistakes. This paper presents the first comprehensive simulation study of data placement issues in a shared-nothing system. The results show that current hardware technology trends have significantly changed the performance tradeoffs considered in past studies. A simplistic data placement strategy based on the new results is developed and shown to perform well for a variety of workloads.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "declustering; disk allocation; resource allocation; resource scheduling", remark = "Check month: January or February??", } @Article{Papazoglou:1997:DMO, author = "Mike P. Papazoglou and Bernd J. Kr{\"a}mer", title = "A Database Model for Object Dynamics", journal = j-VLDB-J, volume = "6", number = "2", pages = "73--96", month = aug, year = "1997", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:41 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html; http://link.springer.de/link/service/journals/00778/tocs/t7006002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition. See erratum \cite{Papazoglou:1997:EDM}.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kr=auml=mer:Bernd_J=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Papazoglou:Mike_P=.html; http://link.springer.de/link/service/journals/00778/bibs/7006002/70060073.htm; http://link.springer.de/link/service/journals/00778/papers/7006002/70060073.pdf; http://link.springer.de/link/service/journals/00778/papers/7006002/70060073.ps.gz", abstract = "To effectively model complex applications in which constantly changing situations can be represented, a database system must be able to support the runtime specification of structural and behavioral nuances for objects on an individual or group basis. This paper introduces the role mechanism as an extension of object-oriented databases to support unanticipated behavioral oscillations for objects that may attain many types and share a single object identity. A role refers to the ability to represent object dynamics by seamlessly integrating idiosyncratic behavior, possibly in response to external events, with pre-existing object behavior specified at instance creation time. In this manner, the same object can simultaneously be an instance of different classes which symbolize the different roles that this object assumes. The role concept and its underlying linguistic scheme simplify the design requirements of complex applications that need to create and manipulate dynamic objects.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "dynamic class hierarchy; dynamic object re-classification; object migration; object role model; object-oriented database systems", remark = "Check month: May or August??", } @Article{Catarci:1997:GIH, author = "Tiziana Catarci and Giuseppe Santucci and John Cardiff", title = "Graphical interaction with heterogeneous databases (*)", journal = j-VLDB-J, volume = "6", number = "2", pages = "97--120", month = aug, year = "1997", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:41 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html; http://link.springer.de/link/service/journals/00778/tocs/t7006002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Cardiff:John.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Catarci:Tiziana.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Santucci:Giuseppe.html; http://link.springer.de/link/service/journals/00778/bibs/7006002/70060097.htm; http://link.springer.de/link/service/journals/00778/papers/7006002/70060097.pdf; http://link.springer.de/link/service/journals/00778/papers/7006002/70060097.ps.gz", abstract = "During the past few years our research efforts have been inspired by two different needs. On one hand, the number of non-expert users accessing databases is growing apace. On the other, information systems will no longer be characterized by a single centralized architecture, but rather by several heterogeneous component systems. In order to address such needs we have designed a new query system with both user-oriented and multidatabase features. The system's main components are an adaptive visual interface, providing the user with different and interchangeable interaction modalities, and a ``translation layer'', which creates and offers to the user the illusion of a single homogeneous schema out of several heterogeneous components. Both components are founded on a common ground, i.e. a formally defined and semantically rich data model, the Graph Model, and a minimal set of Graphical Primitives, in terms of which general query operations may be visually expressed. The Graph Model has a visual syntax, so that graphical operations can be applied on its components without unnecessary mappings, and an object-based semantics. The aim of this paper is twofold. We first present an overall view of the system architecture and then give a comprehensive description of the lower part of the system itself. In particular, we show how schemata expressed in different data models can be translated in terms of Graph Model, possibly by exploiting reverse engineering techniques. Moreover, we show how mappings can be established between well-known query languages and the Graphical Primitives. Finally, we describe in detail how queries expressed by using the Graphical Primitives can be translated in terms of relational expressions so to be processed by actual DBMSs.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", remark = "Check month: May or August??", } @Article{Chen:1997:AHF, author = "Ming-Syan Chen and Hui-I Hsiao and Philip S. Yu", title = "On Applying Hash Filters to Improving the Execution of Multi-Join Queries", journal = j-VLDB-J, volume = "6", number = "2", pages = "121--131", month = aug, year = "1997", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:41 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html; http://link.springer.de/link/service/journals/00778/tocs/t7006002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chen:Ming=Syan.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Hsiao:Hui=I.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/y/Yu:Philip_S=.html; http://link.springer.de/link/service/journals/00778/bibs/7006002/70060121.htm; http://link.springer.de/link/service/journals/00778/papers/7006002/70060121.pdf; http://link.springer.de/link/service/journals/00778/papers/7006002/70060121.ps.gz", abstract = "In this paper, we explore an approach of interleaving a bushy execution tree with hash filters to improve the execution of multi-join queries. Similar to semi-joins in distributed query processing, hash filters can be applied to eliminate non-matching tuples from joining relations before the execution of a join, thus reducing the join cost. Note that hash filters built in different execution stages of a bushy tree can have different costs and effects. The effect of hash filters is evaluated first. Then, an efficient scheme to determine an effective sequence of hash filters for a bushy execution tree is developed, where hash filters are built and applied based on the join sequence specified in the bushy tree so that not only is the reduction effect optimized but also the cost associated is minimized. Various schemes using hash filters are implemented and evaluated via simulation. It is experimentally shown that the application of hash filters is in general a very powerful means to improve the execution of multi-join queries, and the improvement becomes more prominent as the number of relations in a query increases.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "bushy trees; hash filters; parallel query processing; sort-merge joins", remark = "Check month: May or August??", } @Article{Ioannidis:1997:PQO, author = "Yannis E. Ioannidis and Raymond T. Ng and Kyuseok Shim and Timos K. Sellis", title = "Parametric Query Optimization", journal = j-VLDB-J, volume = "6", number = "2", pages = "132--151", month = aug, year = "1997", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:41 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html; http://link.springer.de/link/service/journals/00778/tocs/t7006002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/i/Ioannidis:Yannis_E=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/n/Ng:Raymond_T=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sellis:Timos_K=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Shim:Kyuseok.html; http://link.springer.de/link/service/journals/00778/bibs/7006002/70060132.htm; http://link.springer.de/link/service/journals/00778/papers/7006002/70060132.pdf; http://link.springer.de/link/service/journals/00778/papers/7006002/70060132.ps.gz", abstract = "In most database systems, the values of many important run-time parameters of the system, the data, or the query are unknown at query optimization time. Parametric query optimization attempts to identify at compile time several execution plans, each one of which is optimal for a subset of all possible values of the run-time parameters. The goal is that at run time, when the actual parameter values are known, the appropriate plan should be identifiable with essentially no overhead. We present a general formulation of this problem and study it primarily for the buffer size parameter. We adopt randomized algorithms as the main approach to this style of optimization and enhance them with a {\em sideways information passing\/} feature that increases their effectiveness in the new task. Experimental results of these enhanced algorithms show that they optimize queries for large numbers of buffer sizes in the same time needed by their conventional versions for a single buffer size, without much sacrifice in the output quality and with essentially zero run-time overhead.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", remark = "Check month: May or August??", } @Article{Mehrotra:1997:CCH, author = "Sharad Mehrotra and Henry F. Korth and Avi Silberschatz", title = "Concurrency Control in Hierarchical Multidatabase Systems", journal = j-VLDB-J, volume = "6", number = "2", pages = "152--172", month = aug, year = "1997", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:41 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html; http://link.springer.de/link/service/journals/00778/tocs/t7006002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Korth:Henry_F=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Mehrotra:Sharad.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Silberschatz:Abraham.html; http://link.springer.de/link/service/journals/00778/bibs/7006002/70060152.htm; http://link.springer.de/link/service/journals/00778/papers/7006002/70060152.pdf; http://link.springer.de/link/service/journals/00778/papers/7006002/70060152.ps.gz; http://link.springer.de/link/service/journals/00778/tocs/mailto:helpdesk@link.springer.de", abstract = "Over the past decade, significant research has been done towards developing transaction management algorithms for multidatabase systems. Most of this work assumes a monolithic architecture of the multidatabase system with a single software module that follows a single transaction management algorithm to ensure the consistency of data stored in the local databases. This monolithic architecture is not appropriate in a multidatabase environment where the system spans multiple different organizations that are distributed over various geographically distant locations. In this paper, we propose an alternative multidatabase transaction management architecture, where the system is hierarchical in nature. Hierarchical architecture has consequences on the design of transaction management algorithms. An implication of the architecture is that the transaction management algorithms followed by a multidatabase system must be {\em composable\/} --- that is, it must be possible to incorporate individual multidatabase systems as elements in a larger multidatabase system. We present a hierarchical architecture for a multidatabase environment and develop techniques for concurrency control in such systems.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "concurrency control; database management; distributed databases; multidatabase management", remark = "Check month: May or August??", xxauthor = "Sharad Mehrotra and Henry F. Korth and Abraham Silberschatz", } @Article{Cobb:1997:IOT, author = "Edward E. Cobb", title = "The impact of object technology on commercial transaction processing", journal = j-VLDB-J, volume = "6", number = "3", pages = "173--190", month = aug, year = "1997", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:42 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Businesses today are searching for information solutions that enable them to compete in the global marketplace. To minimize risk, these solutions must build on existing investments, permit the best technology to be applied to the problem, and be manageable. Object technology, with its promise of improved productivity and quality in application development, delivers these characteristics but, to date, its deployment in commercial business applications has been limited. One possible reason is the absence of the transaction paradigm, widely used in commercial environments and essential for reliable business applications. For object technology to be a serious contender in the construction of these solutions requires: --- technology for transactional objects. In December 1994, the Object Management Group adopted a specification for an object {\em transaction service\/} (OTS). The OTS specifies mechanisms for defining and manipulating transactions. Though derived from the X/Open distributed transaction processing model, OTS contains additional enhancements specifically designed for the object environment. Similar technology from Microsoft appeared at the end of 1995. --- methodologies for building new business systems from existing parts. Business process re-engineering is forcing businesses to improve their operations which bring products to market. {\em Workflow computing}, when used in conjunction with {\em ``object wrappers''\/} provides tools to both define and track execution of business processes which leverage existing applications and infrastructure. -- an execution environment which satisfies the requirements of the operational needs of the business. Transaction processing (TP) monitor technology, though widely accepted for mainframe transaction processing, has yet to enjoy similar success in the client/server marketplace. Instead the database vendors, with their extensive tool suites, dominate. As object brokers mature they will require many of the functions of today's TP monitors. Marrying these two technologies can produce a robust execution environment which offers a superior alternative for building and deploying client/server applications.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "objects; transaction processing; workflow", } @Article{Cobb:1997:ITC, author = "Edward E. Cobb", title = "The Impact of Technology on Commercial Transaction Processing", journal = j-VLDB-J, volume = "6", number = "3", pages = "173--190", month = aug, year = "1997", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Sep 27 10:11:57 MDT 2000", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html; http://link.springer.de/link/service/journals/00778/tocs/t0006003.htm; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Cobb:Edward_E=.html; http://link.springer.de/link/service/journals/00778/bibs/7006003/70060173.htm", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", remark = "Check month: May or August??", xxtitle = "The impact of object technology on commercial transaction processing", } @Article{Steinbrunn:1997:HRO, author = "Michael Steinbrunn and Guido Moerkotte and Alfons Kemper", title = "Heuristic and Randomized Optimization for the Join Ordering Problem", journal = j-VLDB-J, volume = "6", number = "3", pages = "191--208", month = aug, year = "1997", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:42 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html; http://link.springer.de/link/service/journals/00778/tocs/t0006003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kemper:Alfons.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Moerkotte:Guido.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Steinbrunn:Michael.html; http://link.springer.de/link/service/journals/00778/bibs/7006003/70060191.htm", abstract = "Recent developments in database technology, such as deductive database systems, have given rise to the demand for new, cost-effective optimization techniques for join expressions. In this paper many different algorithms that compute approximate solutions for optimizing join orders are studied since traditional dynamic programming techniques are not appropriate for complex problems. Two possible solution spaces, the space of left-deep and bushy processing trees, are evaluated from a statistical point of view. The result is that the common limitation to left-deep processing trees is only advisable for certain join graph types. Basically, optimizers from three classes are analysed: heuristic, randomized and genetic algorithms. Each one is extensively scrutinized with respect to its working principle and its fitness for the desired application. It turns out that randomized and genetic algorithms are well suited for optimizing join expressions. They generate solutions of high quality within a reasonable running time. The benefits of heuristic optimizers, namely the short running time, are often outweighed by merely moderate optimization performance.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "genetic algorithms; heuristic algorithms; join ordering; query optimization; randomized algorithms", remark = "Check month: May or August??", } @Article{Panagos:1997:SRC, author = "Euthimios Panagos and Alexandros Biliris", title = "Synchronization and Recovery in a Client-Server Storage System", journal = j-VLDB-J, volume = "6", number = "3", pages = "209--223", month = aug, year = "1997", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:42 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html; http://link.springer.de/link/service/journals/00778/tocs/t0006003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Biliris:Alexandros.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Panagos:Euthimios.html; http://link.springer.de/link/service/journals/00778/bibs/7006003/70060209.htm", abstract = "Client-server object-oriented database management systems differ significantly from traditional centralized systems in terms of their architecture and the applications they target. In this paper, we present the client-server architecture of the EOS storage manager and we describe the concurrency control and recovery mechanisms it employs. EOS offers a semi-optimistic locking scheme based on the multi-granularity two-version two-phase locking protocol. Under this scheme, multiple concurrent readers are allowed to access a data item while it is being updated by a single writer. Recovery is based on write-ahead redo-only logging. Log records are generated at the clients and they are shipped to the server during normal execution and at transaction commit. Transaction rollback is fast because there are no updates that have to be undone, and recovery from system crashes requires only one scan of the log for installing the changes made by transactions that committed before the crash. We also present a preliminary performance evaluation of the implementation of the above mechanisms.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "checkpoint; client-server architecture; concurrency control; locking; logging; object management; recovery; transaction management", remark = "Check month: May or August??", } @Article{Lomet:1997:CRI, author = "David B. Lomet and Betty Salzberg", title = "Concurrency and Recovery for Index Trees", journal = j-VLDB-J, volume = "6", number = "3", pages = "224--240", month = aug, year = "1997", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:42 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html; http://link.springer.de/link/service/journals/00778/tocs/t0006003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lomet:David_B=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Salzberg:Betty.html; http://link.springer.de/link/service/journals/00778/bibs/7006003/70060224.htm", abstract = "Although many suggestions have been made for concurrency in B$^+$-trees, few of these have considered recovery as well. We describe an approach which provides high concurrency while preserving well-formed trees across system crashes. Our approach works for a class of index trees that is a generalization of the B$^{\rm link}$-tree. This class includes some multi-attribute indexes and temporal indexes. Structural changes in an index tree are decomposed into a sequence of atomic actions, each one leaving the tree well-formed and each working on a separate level of the tree. All atomic actions on levels of the tree above the leaf level are independent of database transactions, and so are of short duration. Incomplete structural changes are detected in normal operations and trigger completion.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "access methods; B-trees; concurrency; indexing; recovery", remark = "Check month: May or August??", } @Article{Haas:1997:STA, author = "Laura M. Haas and Michael J. Carey and Miron Livny and Amit Shukla", title = "Seeking the truth about {\em ad hoc\/} join costs", journal = j-VLDB-J, volume = "6", number = "3", pages = "241--256", month = aug, year = "1997", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:42 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html; http://link.springer.de/link/service/journals/00778/tocs/t0006003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Carey:Michael_J=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Haas:Laura_M=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Livny:Miron.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Shukla:Amit.html; http://link.springer.de/link/service/journals/00778/bibs/7006003/70060241.htm", abstract = "In this paper, we re-examine the results of prior work on methods for computing {\em ad hoc\/} joins. We develop a detailed cost model for predicting join algorithm performance, and we use the model to develop cost formulas for the major {\em ad hoc\/} join methods found in the relational database literature. We show that various pieces of ``common wisdom'' about join algorithm performance fail to hold up when analyzed carefully, and we use our detailed cost model to derive optimal buffer allocation schemes for each of the join methods examined here. We show that optimizing their buffer allocations can lead to large performance improvements, e.g., as much as a 400\% improvement in some cases. We also validate our cost model's predictions by measuring an actual implementation of each join algorithm considered. The results of this work should be directly useful to implementors of relational query optimizers and query processing systems.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "buffer allocation; cost models; join methods; optimization; performance", remark = "Check month: May or August??", } @Article{Papazoglou:1997:EDM, author = "Mike P. Papazoglou and Bernd J. Kr{\"a}mer", title = "Erratum --- {A} database model for object dynamics", journal = j-VLDB-J, volume = "6", number = "3", pages = "257--260", month = aug, year = "1997", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:42 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html; http://link.springer.de/link/service/journals/00778/tocs/t0006003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition. See \cite{Papazoglou:1997:DMO}.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kr=auml=mer:Bernd_J=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Papazoglou:Mike_P=.html; http://link.springer.de/link/service/journals/00778/bibs/7006003/70060257.htm", abstract = "Due to a technical error, some figures of the above paper were not reproduced satisfactorily. They are printed again below.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", remark = "Check month: May or August??", } @Article{Fahl:1997:QPO, author = "Gustav Fahl and Tore Risch", title = "Query Processing Over Object Views of Relational Data", journal = j-VLDB-J, volume = "6", number = "4", pages = "261--281", month = nov, year = "1997", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:44 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html; http://link.springer.de/link/service/journals/00778/tocs/t7006004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/f/Fahl:Gustav.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Risch:Tore.html; http://link.springer.de/link/service/journals/00778/bibs/7006004/70060261.htm; http://link.springer.de/link/service/journals/00778/papers/7006004/70060261.pdf", abstract = "This paper presents an approach to {\em object view\/} management for relational databases. Such a view mechanism makes it possible for users to transparently work with data in a relational database as if it was stored in an object-oriented (OO) database. A query against the object view is translated to one or several queries against the relational database. The results of these queries are then processed to form an answer to the initial query. The approach is not restricted to a `pure' object view mechanism for the relational data, since the object view can also store its own data and methods. Therefore it must be possible to process queries that combine local data residing in the object view with data retrieved from the relational database. We discuss the key issues when object views of relational databases are developed, namely: how to map relational structures to sub-type/supertype hierarchies in the view, how to represent relational database access in OO query plans, how to provide the concept of object identity in the view, how to handle the fact that the extension of types in the view depends on the state of the relational database, and how to process and optimize queries against the object view. The results are based on experiences from a running prototype implementation.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "object views; object-oriented federated databases; query optimization; query processing; relational databases", } @Article{Diaz:1997:EEA, author = "Oscar D{\'\i}az and Arturo Jaime", title = "{EXACT}: An Extensible Approach to Active Object-Oriented Databases", journal = j-VLDB-J, volume = "6", number = "4", pages = "282--295", month = nov, year = "1997", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:44 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html; http://link.springer.de/link/service/journals/00778/tocs/t7006004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/D=iacute=az:Oscar.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/j/Jaime:Arturo.html; http://link.springer.de/link/service/journals/00778/bibs/7006004/70060282.htm; http://link.springer.de/link/service/journals/00778/papers/7006004/70060282.pdf", abstract = "Active database management systems (DBMSs) are a fast-growing area of research, mainly due to the large number of applications which can benefit from this active dimension. These applications are far from being homogeneous, requiring different kinds of functionalities. However, most of the active DBMSs described in the literature only provide a {\em fixed, hard-wired\/} execution model to support the active dimension. In object-oriented DBMSs, event-condition-action rules have been proposed for providing active behaviour. This paper presents EXACT, a rule manager for object-oriented DBMSs which provides a variety of options from which the designer can choose the one that best fits the semantics of the concept to be supported by rules. Due to the difficulty of foreseeing future requirements, special attention has been paid to making rule management easily extensible, so that the user can tailor it to suit specific applications. This has been borne out by an implementation in ADAM, an object-oriented DBMS. An example is shown of how the default mechanism can be easily extended to support new requirements.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "active DBMS; extensibility; metaclasses; object-oriented DBMS", } @Article{Bohm:1997:SDS, author = "Klemens B{\"o}hm and Karl Aberer and Erich J. Neuhold and Xiaoya Yang", title = "Structured Document Storage and Refined Declarative and Navigational Access Mechanisms in {HyperStorM}", journal = j-VLDB-J, volume = "6", number = "4", pages = "296--311", month = nov, year = "1997", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:44 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html; http://link.springer.de/link/service/journals/00778/tocs/t7006004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Aberer:Karl.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/B=ouml=hm:Klemens.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/n/Neuhold:Erich_J=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/y/Yang:Xiaoya.html; http://link.springer.de/link/service/journals/00778/bibs/7006004/70060296.htm; http://link.springer.de/link/service/journals/00778/papers/7006004/70060296.pdf", abstract = "The combination of SGML and database technology allows to refine both declarative and navigational access mechanisms for structured document collection: with regard to declarative access, the user can formulate complex information needs without knowing a query language, the respective document type definition (DTD) or the underlying modelling. Navigational access is eased by hyperlink-rendition mechanisms going beyond plain link-integrity checking. With our approach, the database-internal representation of documents is configurable. It allows for an efficient implementation of operations, because DTD knowledge is not needed for document structure recognition. We show how the number of method invocations and the cost of parsing can be significantly reduced.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "document query languages; navigation; OODBMSs; SGML", } @Article{Muck:1997:CTH, author = "Thomas A. M{\"u}ck and Martin L. Polaschek", title = "A Configurable Type Hierarchy Index for {OODB}", journal = j-VLDB-J, volume = "6", number = "4", pages = "312--332", month = nov, year = "1997", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Sep 27 08:46:02 MDT 2000", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/M=uuml=ck:Thomas_A=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Polaschek:Martin_L=.html", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Mueck:1997:CTH, author = "Thomas A. Mueck and Martin L. Polaschek", title = "A configurable type hierarchy index for {OODB}", journal = j-VLDB-J, volume = "6", number = "4", pages = "312--332", month = nov, year = "1997", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:44 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t7006004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/7006004/70060312.htm; http://link.springer.de/link/service/journals/00778/papers/7006004/70060312.pdf", abstract = "With respect to the specific requirements of advanced OODB applications, index data structures for type hierarchies in OODBMS have to provide efficient support for multiattribute queries and have to allow index optimization for a particular query profile. We describe the {\em multikey type index\/} and an efficient implementation of this indexing scheme. It meets both requirements: in addition to its multiattribute query capabilities it is designed as a mediator between two standard design alternatives, key-grouping and type-grouping. A prerequisite for the multikey type index is a linearization algorithm which maps type hierarchies to linearly ordered attribute domains in such a way that each subhierarchy is represented by an interval of this domain. The algorithm extends previous results with respect to multiple inheritance. The subsequent evaluation of our proposal focuses on storage space overhead as well as on the number of disk I/O operations needed for query execution. The analytical results for the multikey type index are compared to previously published figures for well-known single-key search structures. The comparison clearly shows the superiority of the multikey type index for a large class of query profiles.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "access methods; indexing; multiple inheritance; OODB; type hierarchies", } @Article{Berchtold:1997:UEF, author = "Stefan Berchtold and Daniel A. Keim and Hans-Peter Kriegel", title = "Using Extended Feature Objects for Partial Similarity Retrieval", journal = j-VLDB-J, volume = "6", number = "4", pages = "333--348", month = nov, year = "1997", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:44 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html; http://link.springer.de/link/service/journals/00778/tocs/t7006004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Berchtold:Stefan.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Keim:Daniel_A=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kriegel:Hans=Peter.html; http://link.springer.de/link/service/journals/00778/bibs/7006004/70060333.htm; http://link.springer.de/link/service/journals/00778/papers/7006004/70060333.pdf", abstract = "In this paper, we introduce the concept of extended feature objects for similarity retrieval. Conventional approaches for similarity search in databases map each object in the database to a point in some high-dimensional feature space and define similarity as some distance measure in this space. For many similarity search problems, this feature-based approach is not sufficient. When retrieving partially similar polygons, for example, the search cannot be restricted to edge sequences, since similar polygon sections may start and end anywhere on the edges of the polygons. In general, inherently continuous problems such as the partial similarity search cannot be solved by using point objects in feature space. In our solution, we therefore introduce extended feature objects consisting of an infinite set of feature points. For an efficient storage and retrieval of the extended feature objects, we determine the minimal bounding boxes of the feature objects in multidimensional space and store these boxes using a spatial access structure. In our concrete polygon problem, sets of polygon sections are mapped to 2D feature objects in high-dimensional space which are then approximated by minimal bounding boxes and stored in an R$^*$-tree. The selectivity of the index is improved by using an adaptive decomposition of very large feature objects and a dynamic joining of small feature objects. For the polygon problem, translation, rotation, and scaling invariance is achieved by using the Fourier-transformed curvature of the normalized polygon sections. In contrast to vertex-based algorithms, our algorithm guarantees that no false dismissals may occur and additionally provides fast search times for realistic database sizes. We evaluate our method using real polygon data of a supplier for the car manufacturing industry.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "CAD databases; Fourier transformation; indexing and query processing of spatial objects; partial similarity retrieval", } @Article{Han:1998:ORQ, author = "Jia Liang Han", title = "Optimizing Relational Queries in Connection Hypergraphs: Nested Queries, Views, and Binding Propagations", journal = j-VLDB-J, volume = "7", number = "1", pages = "1--11", month = feb, year = "1998", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:45 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html; http://link.springer.de/link/service/journals/00778/tocs/t8007001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Han:Jia_Liang.html; http://link.springer.de/link/service/journals/00778/bibs/8007001/80070001.htm; http://link.springer.de/link/service/journals/00778/papers/8007001/80070001.pdf", abstract = "We optimize relational queries using connection hypergraphs (CHGs). All operations including value-passing between SQL blocks can be set-oriented. By introducing partial evaluations, reordering operations can be achieved for nested queries. For a query using views, we merge CHGs for the views and the query into one CHG and then apply query optimization. Furthermore, we may simulate magic sets methods elegantly in a CHG. Sideways information-passing strategies (SIPS) in a CHG amount to partial evaluations of SIPS paths. We introduce the maximum SIPS strategy, which performs SIPS for all bindings and all SIPS paths for a query. The new method has several advantages. First, the maximum SIPS strategy can be more efficient than the previous SIPS based on simple heuristics. Second, it is conceptually simple and easy to implement. Third, the processing strategies may be incorporated with the search space for query execution plans, which is a proven optimization strategy introduced by System R. Fourth, it provides a general framework of query optimization and may potentially be used to optimize next-generation database systems.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "connection hypergraphs; partial evaluations; relational query optimization; search space; SIPS", } @Article{Hanson:1998:FRC, author = "Eric N. Hanson and I.-Cheng Chen and Roxana Dastur and Kurt Engel and Vijay Ramaswamy and Wendy Tan and Chun Xu", title = "A Flexible and Recoverable Client\slash Server Database Event Notification System", journal = j-VLDB-J, volume = "7", number = "1", pages = "12--24", month = feb, year = "1998", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:45 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html; http://link.springer.de/link/service/journals/00778/tocs/t8007001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chen:I==Cheng.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Dastur:Roxana.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/e/Engel:Kurt.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Hanson:Eric_N=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Ramaswamy:Vijay.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Tan:Wendy.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/x/Xu:Chun.html; http://link.springer.de/link/service/journals/00778/bibs/8007001/80070012.htm; http://link.springer.de/link/service/journals/00778/papers/8007001/80070012.pdf", abstract = "A software architecture is presented that allows client application programs to interact with a DBMS server in a flexible and powerful way, using either direct, volatile messages, or messages sent via recoverable queues. Normal requests from clients to the server and replies from the server to clients can be transmitted using direct or recoverable messages. In addition, an application event notification mechanism is provided, whereby client applications running anywhere on the network can register for events, and when those events are raised, the clients are notified. A novel parameter passing mechanism allows a set of tuples to be included in an event notification. The event mechanism is particularly useful in an active DBMS, where events can be raised by triggers to signal running application programs.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Mehta:1998:OPM, author = "Ashish Mehta and James Geller and Yehoshua Perl and Erich J. Neuhold", title = "The {OODB} Path-Method Generator ({PMG}) Using Access Weights and Precomputed Access Relevance", journal = j-VLDB-J, volume = "7", number = "1", pages = "25--47", month = feb, year = "1998", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:45 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html; http://link.springer.de/link/service/journals/00778/tocs/t8007001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Geller:James.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Mehta:Ashish.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/n/Neuhold:Erich_J=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Perl:Yehoshua.html; http://link.springer.de/link/service/journals/00778/bibs/8007001/80070025.htm; http://link.springer.de/link/service/journals/00778/papers/8007001/80070025.pdf", abstract = "A {\em path-method\/} is used as a mechanism in object-oriented databases (OODBs) to retrieve or to update information relevant to one class that is not stored with that class but with some other class. A path-method is a method which traverses from one class through a chain of connections between classes and accesses information at another class. However, it is a difficult task for a casual user or even an application programmer to write path-methods to facilitate queries. This is because it might require comprehensive knowledge of many classes of the conceptual schema that are not directly involved in the query, and therefore may not even be included in a user's (incomplete) view about the contents of the database. We have developed a system, called {\em path-method generator\/} (PMG), which generates path-methods automatically according to a user's database-manipulating requests. The PMG offers the user one of the possible path-methods and the user verifies from his knowledge of the intended purpose of the request whether that path-method is the desired one. If the path method is rejected, then the user can utilize his now increased knowledge about the database to request (with additional parameters given) another offer from the PMG. The PMG is based on {\em access weights\/} attached to the connections between classes and precomputed {\em access relevance\/} between every pair of classes of the OODB. Specific rules for access weight assignment and algorithms for computing access relevance appeared in our previous papers [MGPF92, MGPF93, MGPF96]. In this paper, we present a variety of traversal algorithms based on access weights and precomputed access relevance. Experiments identify some of these algorithms as very successful in generating most desired path-methods. The PMG system utilizes these successful algorithms and is thus an efficient tool for aiding the user with the difficult task of querying and updating a large OODB.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "access relevance; access weight; object-oriented databases; OODB queries; path-method; traversal algorithms", } @Article{Scheuermann:1998:DPL, author = "Peter Scheuermann and Gerhard Weikum and Peter Zabback", title = "Data Partitioning and Load Balancing in Parallel Disk Systems", journal = j-VLDB-J, volume = "7", number = "1", pages = "48--66", month = feb, year = "1998", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:45 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html; http://link.springer.de/link/service/journals/00778/tocs/t8007001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Scheuermann:Peter.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/w/Weikum:Gerhard.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/z/Zabback:Peter.html; http://link.springer.de/link/service/journals/00778/bibs/8007001/80070048.htm; http://link.springer.de/link/service/journals/00778/papers/8007001/80070048.pdf", abstract = "Parallel disk systems provide opportunities for exploiting I/O parallelism in two possible ways, namely via inter-request and intra-request parallelism. In this paper, we discuss the main issues in performance tuning of such systems, namely striping and load balancing, and show their relationship to response time and throughput. We outline the main components of an intelligent, self-reliant file system that aims to optimize striping by taking into account the requirements of the applications, and performs load balancing by judicious file allocation and dynamic redistributions of the data when access patterns change. Our system uses simple but effective heuristics that incur only little overhead. We present performance experiments based on synthetic workloads and real-life traces.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data allocation; disk cooling; file striping; load balancing; parallel disk systems; performance tuning", } @Article{Ishakbeyoglu:1998:MII, author = "Naci S. Ishakbeyo{\u{g}}lu and Z. Meral {\"O}zsoyo{\u{g}}lu", title = "Maintenance of Implication Integrity Constraints Under Updates to Constraints", journal = j-VLDB-J, volume = "7", number = "2", pages = "67--78", month = may, year = "1998", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:45 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html; http://link.springer.de/link/service/journals/00778/tocs/t8007002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/=/=Ouml=zsoyoglu:Z=_Meral.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/i/Ishakbeyoglu:Naci.html; http://link.springer.de/link/service/journals/00778/bibs/8007002/80070067.htm; http://link.springer.de/link/service/journals/00778/papers/8007002/80070067.pdf", abstract = "Semantic integrity constraints are used for enforcing the integrity of the database as well as for improving the efficiency of the database utilization. Although semantic integrity constraints are usually much more static as compared to the data itself, changes in the data semantics may necessitate corresponding changes in the constraint base. In this paper we address the problems related with maintaining a consistent and non-redundant set of constraints satisfied by the database in the case of updates to the constraint base. We consider implication constraints as semantic integrity constraints. The constraints are represented as conjunctions of inequalities. We present a methodology to determine whether a constraint is redundant or contradictory with respect to a set of constraints. The methodology is based on the partitioning of the constraint base which improves the efficiency of algorithms that check whether a constraint is redundant or contradictory with respect to a constraint base.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "implication integrity constraints; integrity constraints; partitioning; redundancy; satisfiability", } @Article{Dessloch:1998:ADP, author = "Stefan De{\ss}loch and Theo H{\"a}rder and Nelson Mendon{\c{c}}a Mattos and Bernhard Mitschang and Joachim Thomas", title = "Advanced Data Processing in {KRISYS}: Modeling Concepts, Implementation Techniques, and Client\slash Server Issues", journal = j-VLDB-J, volume = "7", number = "2", pages = "79--95", month = may, year = "1998", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:45 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html; http://link.springer.de/link/service/journals/00778/tocs/t8007002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/De=szlig=loch:Stefan.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/H=auml=rder:Theo.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Mattos:Nelson_Mendon=ccedil=a.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Mitschang:Bernhard.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Thomas:Joachim.html; http://link.springer.de/link/service/journals/00778/bibs/8007002/80070079.htm; http://link.springer.de/link/service/journals/00778/papers/8007002/80070079.pdf", abstract = "The increasing power of modern computers is steadily opening up new application domains for advanced data processing such as engineering and knowledge-based applications. To meet their requirements, concepts for advanced data management have been investigated during the last decade, especially in the field of object orientation. Over the last couple of years, the database group at the University of Kaiserslautern has been developing such an advanced database system, the KRISYS prototype. In this article, we report on the results and experiences obtained in the course of this project. The primary objective for the first version of KRISYS was to provide semantic features, such as an expressive data model, a set-oriented query language, deductive as well as active capabilities. The first KRISYS prototype became completely operational in 1989. To evaluate its features and to stabilize its functionality, we started to develop several applications with the system. These experiences marked the starting point for an overall redesign of KRISYS. Major goals were to tune KRISYS and its query-processing facilities to a suitable client/server environment, as well as to provide elaborate mechanisms for consistency control comprising semantic integrity constraints, multi-user synchronization, and failure recovery. The essential aspects of the resulting client/server architecture are embodied by the client-side data management needed to effectively support advanced applications and to gain the required system performance for interactive work. The project stages of KRISYS properly reflect the essential developments that have taken place in the research on advanced database systems over the last years. Hence, the subsequent discussions will bring up a number of important aspects with regard to advanced data processing that are of significant general importance, as well as of general applicability to database systems.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "client\slash server architectures; Consistency control; object-oriented modeling concepts; query processing; run-time optimization", } @Article{Abiteboul:1998:LVS, author = "Serge Abiteboul and Sophie Cluet and Tova Milo", title = "A Logical View of Structured Files", journal = j-VLDB-J, volume = "7", number = "2", pages = "96--114", month = may, year = "1998", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:45 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html; http://link.springer.de/link/service/journals/00778/tocs/t8007002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Abiteboul:Serge.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Cluet:Sophie.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Milo:Tova.html; http://link.springer.de/link/service/journals/00778/bibs/8007002/80070096.htm; http://link.springer.de/link/service/journals/00778/papers/8007002/80070096.pdf", abstract = "Structured data stored in files can benefit from standard database technology. In particular, we show here how such data can be queried and updated using declarative database languages. We introduce the notion of {\em structuring schema}, which consists of a grammar annotated with database programs. Based on a structuring schema, a file can be viewed as a database structure, queried and updated as such. For {\em queries}, we show that almost standard database optimization techniques can be used to answer queries without having to construct the entire database. For {\em updates}, we study in depth the propagation to the file of an update specified on the database view of this file. The problem is not feasible in general and we present a number of negative results. The positive results consist of techniques that allow to propagate updates efficiently under some reasonable {\em locality\/} conditions on the structuring schemas.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "database; file system; query; query and update optimization; textual data; update", } @Article{Ooi:1998:FIR, author = "Beng Chin Ooi and Kian-Lee Tan and Tat Seng Chua and Wynne Hsu", title = "Fast Image Retrieval Using Color-Spatial Information", journal = j-VLDB-J, volume = "7", number = "2", pages = "115--128", month = may, year = "1998", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:45 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html; http://link.springer.de/link/service/journals/00778/tocs/t8007002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chua:Tat=Seng.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Hsu:Wynne.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/o/Ooi:Beng_Chin.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Tan:Kian=Lee.html; http://link.springer.de/link/service/journals/00778/bibs/8007002/80070115.htm; http://link.springer.de/link/service/journals/00778/papers/8007002/80070115.pdf", abstract = "In this paper, we present an image retrieval system that employs both the color and spatial information of images to facilitate the retrieval process. The basic unit used in our technique is a {\em single-colored cluster}, which bounds a homogeneous region of that color in an image. Two clusters from two images are similar if they are of the same color and overlap in the image space. The number of clusters that can be extracted from an image can be very large, and it affects the accuracy of retrieval. We study the effect of the number of clusters on retrieval effectiveness to determine an appropriate value for ``optimal'' performance. To facilitate efficient retrieval, we also propose a multi-tier indexing mechanism called the {\em Sequenced Multi-Attribute Tree\/} (SMAT). We implemented a two-tier SMAT, where the first layer is used to prune away clusters that are of different colors, while the second layer discriminates clusters of different spatial locality. We conducted an experimental study on an image database consisting of 12,000 images. Our results show the effectiveness of the proposed color-spatial approach, and the efficiency of the proposed indexing mechanism.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "color-spatial information; content-based retrieval; sequenced multi-attribute tree; single-colored cluster", } @Article{Jarke:1998:GE, author = "Matthias Jarke", title = "Guest {Editorial}", journal = j-VLDB-J, volume = "7", number = "3", pages = "129--129", month = aug, year = "1998", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:47 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t8007003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/8007003/80070129.htm; http://link.springer.de/link/service/journals/00778/papers/8007003/80070129.pdf", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Seshadri:1998:EAD, author = "Praveen Seshadri", title = "Enhanced Abstract Data Types in Object-Relational Databases", journal = j-VLDB-J, volume = "7", number = "3", pages = "130--140", month = aug, year = "1998", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:47 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html; http://link.springer.de/link/service/journals/00778/tocs/t8007003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Special Issue on {VLDB 1997}. Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Seshadri:Praveen.html; http://link.springer.de/link/service/journals/00778/bibs/8007003/80070130.htm; http://link.springer.de/link/service/journals/00778/papers/8007003/80070130.pdf", abstract = "The explosion in complex multimedia content makes it crucial for database systems to support such data efficiently. This paper argues that the ``blackbox'' ADTs used in current object-relational systems inhibit their performance, thereby limiting their use in emerging applications. Instead, the next generation of object-relational database systems should be based on enhanced abstract data type (E-ADT) technology. An (E-ADT) can expose the {\em semantics\/} of its methods to the database system, thereby permitting advanced query optimizations. Fundamental architectural changes are required to build a database system with E-ADTs; the added functionality should not compromise the modularity of data types and the extensibility of the type system. The implementation issues have been explored through the development of E-ADTs in {\em Predator}. Initial performance results demonstrate an order of magnitude in performance improvements.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "database types; extensibility; object-relational database; query optimization", } @Article{Kraiss:1998:IDC, author = "Achim Kraiss and Gerhard Weikum", title = "Integrated Document Caching and Prefetching in Storage Hierarchies Based on {Markov}-Chain Predictions", journal = j-VLDB-J, volume = "7", number = "3", pages = "141--162", month = aug, year = "1998", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:47 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html; http://link.springer.de/link/service/journals/00778/tocs/t8007003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kraiss:Achim.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/w/Weikum:Gerhard.html; http://link.springer.de/link/service/journals/00778/bibs/8007003/80070141.htm; http://link.springer.de/link/service/journals/00778/papers/8007003/80070141.pdf", abstract = "Large multimedia document archives may hold a major fraction of their data in tertiary storage libraries for cost reasons. This paper develops an integrated approach to the vertical data migration between the tertiary, secondary, and primary storage in that it reconciles speculative prefetching, to mask the high latency of the tertiary storage, with the replacement policy of the document caches at the secondary and primary storage level, and also considers the interaction of these policies with the tertiary and secondary storage request scheduling. The integrated migration policy is based on a continuous-time Markov chain model for predicting the expected number of accesses to a document within a specified time horizon. Prefetching is initiated only if that expectation is higher than those of the documents that need to be dropped from secondary storage to free up the necessary space. In addition, the possible resource contention at the tertiary and secondary storage is taken into account by dynamically assessing the response-time benefit of prefetching a document versus the penalty that it would incur on the response time of the pending document requests. The parameters of the continuous-time Markov chain model, the probabilities of co-accessing certain documents and the interaction times between successive accesses, are dynamically estimated and adjusted to evolving workload patterns by keeping online statistics. The integrated policy for vertical data migration has been implemented in a prototype system. The system makes profitable use of the Markov chain model also for the scheduling of volume exchanges in the tertiary storage library. Detailed simulation experiments with Web-server-like synthetic workloads indicate significant gains in terms of client response time. The experiments also show that the overhead of the statistical bookkeeping and the computations for the access predictions is affordable.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "caching; Markov chains; performance; prefetching; scheduling; stochastic modeling; tertiary storage", } @Article{Chakrabarti:1998:SFS, author = "Soumen Chakrabarti and Byron Dom and Rakesh Agrawal and Prabhakar Raghavan", title = "Scalable Feature Selection, Classification and Signature Generation for Organizing Large Text Databases into Hierarchical Topic Taxonomies", journal = j-VLDB-J, volume = "7", number = "3", pages = "163--178", month = aug, year = "1998", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:47 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html; http://link.springer.de/link/service/journals/00778/tocs/t8007003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Agrawal:Rakesh.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chakrabarti:Soumen.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Dom:Byron.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Raghavan:Prabhakar.html; http://link.springer.de/link/service/journals/00778/bibs/8007003/80070163.htm; http://link.springer.de/link/service/journals/00778/papers/8007003/80070163.pdf", abstract = "We explore how to organize large text databases hierarchically by topic to aid better searching, browsing and filtering. Many corpora, such as Internet directories, digital libraries, and patent databases are manually organized into topic hierarchies, also called {\em taxonomies}. Similar to indices for relational data, taxonomies make search and access more efficient. However, the exponential growth in the volume of on-line textual information makes it nearly impossible to maintain such taxonomic organization for large, fast-changing corpora by hand. We describe an automatic system that starts with a small sample of the corpus in which topics have been assigned by hand, and then updates the database with new documents as the corpus grows, assigning topics to these new documents with high speed and accuracy. To do this, we use techniques from statistical pattern recognition to efficiently separate the {\em feature\/} words, or {\em discriminants}, from the {\em noise\/} words at each node of the taxonomy. Using these, we build a multilevel classifier. At each node, this classifier can ignore the large number of ``noise'' words in a document. Thus, the classifier has a small model size and is very fast. Owing to the use of context-sensitive features, the classifier is very accurate. As a by-product, we can compute for each document a set of terms that occur significantly more often in it than in the classes to which it belongs. We describe the design and implementation of our system, stressing how to exploit standard, efficient relational operations like sorts and joins. We report on experiences with the Reuters newswire benchmark, the US patent database, and web document samples from Yahoo!. We discuss applications where our system can improve searching and filtering capabilities.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Roy:1998:GCO, author = "Prasan Roy and S. Seshadri and Abraham Silberschatz and S. Sudarshan and S. Ashwin", title = "Garbage Collection in Object-Oriented Databases Using Transactional Cyclic Reference Counting", journal = j-VLDB-J, volume = "7", number = "3", pages = "179--193", month = aug, year = "1998", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:47 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html; http://link.springer.de/link/service/journals/00778/tocs/t8007003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Ashwin:S=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Roy:Prasan.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Seshadri:S=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Silberschatz:Abraham.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sudarshan:S=.html; http://link.springer.de/link/service/journals/00778/bibs/8007003/80070179.htm; http://link.springer.de/link/service/journals/00778/papers/8007003/80070179.pdf", abstract = "Garbage collection is important in object-oriented databases to free the programmer from explicitly deallocating memory. In this paper, we present a garbage collection algorithm, called Transactional Cyclic Reference Counting (TCRC), for object-oriented databases. The algorithm is based on a variant of a reference-counting algorithm proposed for functional programming languages The algorithm keeps track of auxiliary reference count information to detect and collect cyclic garbage. The algorithm works correctly in the presence of concurrently running transactions, and system failures. It does not obtain any long-term locks, thereby minimizing interference with transaction processing. It uses recovery subsystem logs to detect pointer updates; thus, existing code need not be rewritten. Finally, it exploits schema information, if available, to reduce costs. We have implemented the TCRC algorithm and present results of a performance study of the implementation.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ng:1998:IRM, author = "Wee Teck Ng and Peter M. Chen", title = "Integrating Reliable Memory in Databases", journal = j-VLDB-J, volume = "7", number = "3", pages = "194--204", month = aug, year = "1998", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:47 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html; http://link.springer.de/link/service/journals/00778/tocs/t8007003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chen:Peter_M=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/n/Ng:Wee_Teck.html; http://link.springer.de/link/service/journals/00778/bibs/8007003/80070194.htm; http://link.springer.de/link/service/journals/00778/papers/8007003/80070194.pdf", abstract = "Recent results in the Rio project at the University of Michigan show that it is possible to create an area of main memory that is as safe as disk from operating system crashes. This paper explores how to integrate the reliable memory provided by the Rio file cache into a database system. Prior studies have analyzed the performance benefits of reliable memory; we focus instead on how different designs affect reliability. We propose three designs for integrating reliable memory into databases: non-persistent database buffer cache, persistent database buffer cache, and persistent database buffer cache with protection. Non-persistent buffer caches use an I/O interface to reliable memory and require the fewest modifications to existing databases. However, they waste memory capacity and bandwidth due to double buffering. Persistent buffer caches use a memory interface to reliable memory by mapping it into the database address space. This places reliable memory under complete database control and eliminates double buffering, but it may expose the buffer cache to database errors. Our third design reduces this exposure by write protecting the buffer pages. Extensive fault tests show that mapping reliable memory into the database address space does not significantly hurt reliability. This is because wild stores rarely touch dirty, committed pages written by previous transactions. As a result, we believe that databases should use a memory interface to reliable memory.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "main memory database system (MMDB); recovery; reliability", } @Article{Ozsu:1998:I, author = "M. Tamer {\"O}zsu and Stavros Christodoulakis", title = "Introduction", journal = j-VLDB-J, volume = "7", number = "4", pages = "205--205", month = dec, year = "1998", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:48 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ozsu:1998:SIM, author = "M. Tamer {\"O}zsu and Stavros Christodoulakis", title = "Special Issue on Multimedia Databases: Introduction", journal = j-VLDB-J, volume = "7", number = "4", pages = "205--205", month = dec, year = "1998", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Sep 27 10:11:57 MDT 2000", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html; http://link.springer.de/link/service/journals/00778/tocs/t8007004.htm; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/=/=Ouml=zsu:M=_Tamer.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Christodoulakis:Stavros.html; http://link.springer.de/link/service/journals/00778/bibs/8007004/80070205.htm; http://link.springer.de/link/service/journals/00778/papers/8007004/80070205.pdf", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Garofalakis:1998:PRS, author = "Minos N. Garofalakis and Banu {\"O}zden and Avi Silberschatz", title = "On Periodic Resource scheduling for Continuous-Media Databases", journal = j-VLDB-J, volume = "7", number = "4", pages = "206--225", month = dec, year = "1998", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Sep 27 10:11:57 MDT 2000", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html; http://link.springer.de/link/service/journals/00778/tocs/t8007004.htm; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/=/=Ouml=zden:Banu.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Garofalakis:Minos_N=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Silberschatz:Abraham.html; http://link.springer.de/link/service/journals/00778/bibs/8007004/80070206.htm; http://link.springer.de/link/service/journals/00778/papers/8007004/80070206.pdf", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", xxauthor = "Minos N. Garofalakis and Banu {\"O}zden and Abraham Silberschatz", } @Article{Jiang:1998:STC, author = "Haitao Jiang and Ahmed K. Elmagarmid", title = "Spatial and Temporal Content-Based Access to Hypervideo Databases", journal = j-VLDB-J, volume = "7", number = "4", pages = "226--238", month = dec, year = "1998", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:48 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html; http://link.springer.de/link/service/journals/00778/tocs/t8007004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/e/Elmagarmid:Ahmed_K=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/j/Jiang:Haitao.html; http://link.springer.de/link/service/journals/00778/bibs/8007004/80070226.htm; http://link.springer.de/link/service/journals/00778/papers/8007004/80070226.pdf", abstract = "Providing content-based video query, retrieval and browsing is the most important goal of a video database management system (VDBMS). Video data is unique not only in terms of its spatial and temporal characteristics, but also in the semantic associations manifested by the entities present in the video. This paper introduces a novel video data model called {\em Logical Hypervideo Data Model}. In addition to multilevel video abstractions, the model is capable of representing video entities that users are interested in (defined as {\em hot objects\/}) and their semantic associations with other logical video abstractions, including hot objects themselves. The semantic associations are modeled as {\em video hyperlinks\/} and video data with such property are called {\em hypervideo}. Video hyperlinks provide a flexible and effective way of browsing video data. Based on the proposed model, video queries can be specified with both temporal and spatial constraints, as well as with semantic descriptions of the video data. The characteristics of hot objects' spatial and temporal relations and efficient evaluation of them are also discussed. Some query examples are given to demonstrate the expressiveness of the video data model and query language. Finally, we describe a modular video database system architecture that our web-based prototype is based on.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "content-based query; hot object; hypervideo; spatial and temporal constraint; video database", } @Article{Ng:1998:OCO, author = "Raymond T. Ng and Paul Shum", title = "Optimal Clip Ordering for Multi-Clip Queries", journal = j-VLDB-J, volume = "7", number = "4", pages = "239--252", month = dec, year = "1998", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:48 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html; http://link.springer.de/link/service/journals/00778/tocs/t8007004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/n/Ng:Raymond_T=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Shum:Paul.html; http://link.springer.de/link/service/journals/00778/bibs/8007004/80070239.htm; http://link.springer.de/link/service/journals/00778/papers/8007004/80070239.pdf", abstract = "A multi-clip query requests multiple video clips be returned as the answer of the query. In many applications and situations, the order in which these clips are to be delivered does not matter that much to the user. This allows the system ample opportunities to optimize system throughput by using schedules that maximize the effect of piggybacking. In this paper, we study how to find such optimal schedules. In particular, we consider two optimization criteria: (i) one based on maximizing the number of piggybacked clips, and (ii) the other based on maximizing the impact on buffer space. We show that the optimal schedule under the first criterion is equivalent to a maximum matching in a suitably defined bipartite graph, and that under the second criterion, the optimal schedule is equivalent to a maximum matching in a suitably defined weighted bipartite graph. Our experimental results, which are based on realistic distributions, indicate that both kinds of optimal schedules can lead to a gain in throughput of over 300\%. And yet the time taken to compute such an optimal schedule is negligible. Finally, we show how to deal with clips that are variable in length.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "admission control; bipartite graph matching; performance of multimedia systems", } @Article{Soffer:1998:ISI, author = "Aya Soffer and Hanan Samet", title = "Integrating Symbolic Images into a Multimedia Database System Using Classification and Abstraction Approaches", journal = j-VLDB-J, volume = "7", number = "4", pages = "253--274", month = dec, year = "1998", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:48 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html; http://link.springer.de/link/service/journals/00778/tocs/t8007004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Samet:Hanan.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Soffer:Aya.html; http://link.springer.de/link/service/journals/00778/bibs/8007004/80070253.htm; http://link.springer.de/link/service/journals/00778/papers/8007004/80070253.pdf", abstract = "Symbolic images are composed of a finite set of symbols that have a semantic meaning. Examples of symbolic images include maps (where the semantic meaning of the symbols is given in the legend), engineering drawings, and floor plans. Two approaches for supporting queries on symbolic-image databases that are based on image content are studied. The classification approach preprocesses all symbolic images and attaches a semantic classification and an associated certainty factor to each object that it finds in the image. The abstraction approach describes each object in the symbolic image by using a vector consisting of the values of some of its features (e.g., shape, genus, etc.). The approaches differ in the way in which responses to queries are computed. In the classification approach, images are retrieved on the basis of whether or not they contain objects that have the same classification as the objects in the query. On the other hand, in the abstraction approach, retrieval is on the basis of similarity of feature vector values of these objects. Methods of integrating these two approaches into a relational multimedia database management system so that symbolic images can be stored and retrieved based on their content are described. Schema definitions and indices that support query specifications involving spatial as well as contextual constraints are presented. Spatial constraints may be based on both locational information (e.g., distance) and relational information (e.g., north of). Different strategies for image retrieval for a number of typical queries using these approaches are described. Estimated costs are derived for these strategies. Results are reported of a comparative study of the two approaches in terms of image insertion time, storage space, retrieval accuracy, and retrieval time.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "image indexing; multimedia databases; query optimization; retrieval by content; spatial databases; symbolic-image databases", } @Article{Zezula:1998:ASR, author = "Pavel Zezula and Pasquale Savino and Giuseppe Amato and Fausto Rabitti", title = "Approximate Similarity Retrieval with {M}-Trees", journal = j-VLDB-J, volume = "7", number = "4", pages = "275--293", month = dec, year = "1998", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:48 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html; http://link.springer.de/link/service/journals/00778/tocs/t8007004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Amato:Giuseppe.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Rabitti:Fausto.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Savino:Pasquale.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/z/Zezula:Pavel.html; http://link.springer.de/link/service/journals/00778/bibs/8007004/80070275.htm; http://link.springer.de/link/service/journals/00778/papers/8007004/80070275.pdf", abstract = "Motivated by the urgent need to improve the efficiency of similarity queries, approximate similarity retrieval is investigated in the environment of a metric tree index called the M-tree. Three different approximation techniques are proposed, which show how to forsake query precision for improved performance. Measures are defined that can quantify the improvements in performance efficiency and the quality of approximations. The proposed approximation techniques are then tested on various synthetic and real-life files. The evidence obtained from the experiments confirms our hypothesis that a high-quality approximated similarity search can be performed at a much lower cost than that needed to obtain the exact results. The proposed approximation techniques are scalable and appear to be independent of the metric used. Extensions of these techniques to the environments of other similarity search indexes are also discussed.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "access structures; approximation algorithms; distance only data; performance evaluation; similarity search", } @Article{Balkir:1998:DPM, author = "Nevzat Hurkan Balkir and Gultekin {\"O}zsoyoglu", title = "Delivering Presentations from Multimedia Servers", journal = j-VLDB-J, volume = "7", number = "4", pages = "294--307", month = dec, year = "1998", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:48 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html; http://link.springer.de/link/service/journals/00778/tocs/t8007004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/=/=Ouml=zsoyoglu:Gultekin.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Balkir:Nevzat_Hurkan.html; http://link.springer.de/link/service/journals/00778/bibs/8007004/80070294.htm; http://link.springer.de/link/service/journals/00778/papers/8007004/80070294.pdf", abstract = "Most multimedia servers reported in the literature are designed to serve multiple and independent video/audio streams. We think that, in future, multimedia servers will also serve complete presentations. Multimedia presentations provide unique opportunities to develop algorithms for buffer management and admission control, as execution-time consumption requirements of presentations are known a priori. In this paper, we examine presentations in three different domains (heavyweight, middleweight, and lightweight) and provide buffer management and admission control algorithms for the three domains. We propose two improvements (flattening and dynamic-adjustments) on the schedules created for the heavyweight presentations. Results from a simulation environment are presented.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "admission control; buffer management; flattening; multimedia presentations", } @Article{Li:1999:FJU, author = "Zhe Li and Kenneth A. Ross", title = "Fast Joins Using Join Indices", journal = j-VLDB-J, volume = "8", number = "1", pages = "1--24", month = apr, year = "1999", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:49 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html; http://link.springer.de/link/service/journals/00778/tocs/t9008001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Li:Zhe.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Ross:Kenneth_A=.html; http://link.springer.de/link/service/journals/00778/bibs/9008001/90080001.htm; http://link.springer.de/link/service/journals/00778/papers/9008001/90080001.pdf", abstract = "Two new algorithms, ``Jive join'' and ``Slam join,'' are proposed for computing the join of two relations using a join index. The algorithms are duals: Jive join range-partitions input relation tuple ids and then processes each partition, while Slam join forms ordered runs of input relation tuple ids and then merges the results. Both algorithms make a single sequential pass through each input relation, in addition to one pass through the join index and two passes through a temporary file, whose size is half that of the join index. Both algorithms require only that the number of blocks in main memory is of the order of the square root of the number of blocks in the smaller relation. By storing intermediate and final join results in a vertically partitioned fashion, our algorithms need to manipulate less data in memory at a given time than other algorithms. The algorithms are resistant to data skew and adaptive to memory fluctuations. Selection conditions can be incorporated into the algorithms. Using a detailed cost model, the algorithms are analyzed and compared with competing algorithms. For large input relations, our algorithms perform significantly better than Valduriez's algorithm, the TID join algorithm, and hash join algorithms. An experimental study is also conducted to validate the analytical results and to demonstrate the performance characteristics of each algorithm in practice.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "decision support systems; query processing", remark = "Check month: April or May??", } @Article{Harder:1999:IPS, author = "Theo H{\"a}rder and G{\"u}nter Sauter and Joachim Thomas", title = "The Intrinsic Problems of Structural Heterogeneity and an Approach to Their Solution", journal = j-VLDB-J, volume = "8", number = "1", pages = "25--43", month = apr, year = "1999", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:49 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html; http://link.springer.de/link/service/journals/00778/tocs/t9008001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/H=auml=rder:Theo.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sauter:G=uuml=nter.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Thomas:Joachim.html; http://link.springer.de/link/service/journals/00778/bibs/9008001/90080025.htm; http://link.springer.de/link/service/journals/00778/papers/9008001/90080025.pdf", abstract = "This paper focuses on the problems that arise when integrating data from heterogeneous sources in a single, unified database view. At first, we give a detailed analysis of the kinds of structural heterogeneity that occur when unified views are derived from different database systems. We present the results in a multiple tier architecture which distinguishes different levels of heterogeneity and relates them to their underlying causes as well as to the mapping conflicts resulting from the view derivation process. As the second essential contribution, the paper presents our approach to a mapping language solving the identified conflicts. The main characteristics of the language are its descriptiveness, its capability to map between schemas written in the relational, object-oriented, ER, or EXPRESS data model, and its facilities for specifying user-defined update operations on the view that are to be propagated to the data sources. Finally, we briefly discuss how this mapping information is employed to convert queries formulated with respect to the integrated view, into database operations over the heterogeneous data sources.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "heterogeneity; legacy systems; mapping language; schema integration; schema mapping; updatable views", remark = "Check month: April or May??", } @Article{Huang:1999:CTP, author = "Yueh-Min Huang and Jen-Wen Ding and Shiao-Li Tsao", title = "Constant Time Permutation: An Efficient Block Allocation Strategy for Variable-Bit-Rate Continuous Media Data", journal = j-VLDB-J, volume = "8", number = "1", pages = "44--54", month = apr, year = "1999", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:49 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html; http://link.springer.de/link/service/journals/00778/tocs/t9008001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Ding:Jen=Wen.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Huang:Yueh=Min.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Tsao:Shiao=Li.html; http://link.springer.de/link/service/journals/00778/bibs/9008001/90080044.htm; http://link.springer.de/link/service/journals/00778/papers/9008001/90080044.pdf", abstract = "To provide high accessibility of continuous-media (CM) data, CM servers generally stripe data across multiple disks. Currently, the most widely used striping scheme for CM data is round-robin permutation (RRP). Unfortunately, when RRP is applied to variable-bit-rate (VBR) CM data, load imbalance across multiple disks occurs, thereby reducing overall system performance. In this paper, the performance of a VBR CM server with RRP is analyzed. In addition, we propose an efficient striping scheme called constant time permutation (CTP), which takes the VBR characteristic into account and obtains a more balanced load than RRP. Analytic models of both RRP and CTP are presented, and the models are verified via trace-driven simulations. Analysis and simulation results show that CTP can substantially increase the number of clients supported, though it might introduce a few seconds/minutes of initial delay.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "continuous-media server; data placement; load balancing; striping; video-on-demand (VOD)", remark = "Check month: April or May??", } @Article{Kabra:1999:OOO, author = "Navin Kabra and David J. DeWitt", title = "{OPT++}: an object-oriented implementation for extensible database query optimization", journal = j-VLDB-J, volume = "8", number = "1", pages = "55--78", month = apr, year = "1999", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:49 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html; http://link.springer.de/link/service/journals/00778/tocs/t9008001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/DeWitt:David_J=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kabra:Navin.html; http://link.springer.de/link/service/journals/00778/bibs/9008001/90080055.htm; http://link.springer.de/link/service/journals/00778/papers/9008001/90080055.pdf", abstract = "In this paper we describe the design and implementation of OPT++, a tool for extensible database query optimization that uses an object-oriented design to simplify the task of implementing, extending, and modifying an optimizer. Building an optimizer using OPT++ makes it easy to extend the query algebra (to add new query algebra operators and physical implementation algorithms to the system), easy to change the search space, and also to change the search strategy. Furthermore, OPT++ comes equipped with a number of search strategies that are available for use by an optimizer-implementor. OPT++ considerably simplifies both, the task of implementing an optimizer for a new database system, and the task of evaluating alternative optimization techniques and strategies to decide what techniques are best suited for that database system. We present the results of a series of performance studies. These results validate our design and show that, in spite of its flexibility, OPT++ can be used to build efficient optimizers.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "extensibility; object-relational databases; query optimization; software architecture", remark = "Check month: April or May??", } @Article{Krivokapic:1999:DDD, author = "Natalija Krivokapi{\'c} and Alfons Kemper and Ehud Gudes", title = "Deadlock Detection in Distributed Database Systems: a New Algorithm and a Comparative Performance Analysis", journal = j-VLDB-J, volume = "8", number = "2", pages = "79--100", month = apr, year = "1999", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:50 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html; http://link.springer.de/link/service/journals/00778/tocs/t9008002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Gudes:Ehud.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kemper:Alfons.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Krivokapic:Natalija.html; http://link.springer.de/link/service/journals/00778/bibs/9008002/90080079.htm; http://link.springer.de/link/service/journals/00778/papers/9008002/90080079.pdf", abstract = "This paper attempts a comprehensive study of deadlock detection in distributed database systems. First, the two predominant deadlock models in these systems and the four different distributed deadlock detection approaches are discussed. Afterwards, a new deadlock detection algorithm is presented. The algorithm is based on dynamically creating {\em deadlock detection agents\/} (DDAs), each being responsible for detecting deadlocks in one connected component of the global wait-for-graph (WFG). The DDA scheme is a ``self-tuning'' system: after an initial warm-up phase, dedicated DDAs will be formed for ``centers of locality'', i.e., parts of the system where many conflicts occur. A dynamic shift in locality of the distributed system will be responded to by automatically creating new DDAs while the obsolete ones terminate. In this paper, we also compare the most competitive representative of each class of algorithms suitable for distributed database systems based on a simulation model, and point out their relative strengths and weaknesses. The extensive experiments we carried out indicate that our newly proposed deadlock detection algorithm outperforms the other algorithms in the vast majority of configurations and workloads and, in contrast to all other algorithms, is very robust with respect to differing load and access profiles.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "comparative performance analysis; deadlock detection; distributed database systems; simulation study", remark = "Check month: April or May??", } @Article{Boncz:1999:MPQ, author = "Peter A. Boncz and Martin L. Kersten", title = "{MIL} primitives for querying a fragmented world", journal = j-VLDB-J, volume = "8", number = "2", pages = "101--119", month = apr, year = "1999", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:50 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html; http://link.springer.de/link/service/journals/00778/tocs/t9008002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Boncz:Peter_A=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kersten:Martin_L=.html; http://link.springer.de/link/service/journals/00778/bibs/9008002/90080101.htm; http://link.springer.de/link/service/journals/00778/papers/9008002/90080101.pdf", abstract = "In query-intensive database application areas, like decision support and data mining, systems that use vertical fragmentation have a significant performance advantage. In order to support relational or object oriented applications on top of such a fragmented data model, a flexible yet powerful intermediate language is needed. This problem has been successfully tackled in Monet, a modern extensible database kernel developed by our group. We focus on the design choices made in the Monet interpreter language (MIL), its algebraic query language, and outline how its concept of tactical optimization enhances and simplifies the optimization of complex queries. Finally, we summarize the experience gained in Monet by creating a highly efficient implementation of MIL.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "database systems; main-memory techniques; query languages; query optimization; vertical fragmentation", remark = "Check month: April or May??", } @Article{Aslan:1999:SHR, author = "Goksel Aslan and Dennis McLeod", title = "Semantic Heterogeneity Resolution in Federated Databases by Metadata Implantation and Stepwise Evolution", journal = j-VLDB-J, volume = "8", number = "2", pages = "120--132", month = apr, year = "1999", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:50 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html; http://link.springer.de/link/service/journals/00778/tocs/t9008002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Aslan:Goksel.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/McLeod:Dennis.html; http://link.springer.de/link/service/journals/00778/bibs/9008002/90080120.htm; http://link.springer.de/link/service/journals/00778/papers/9008002/90080120.pdf", abstract = "A key aspect of interoperation among data-intensive systems involves the mediation of metadata and ontologies across database boundaries. One way to achieve such mediation between a local database and a remote database is to fold remote metadata into the local metadata, thereby creating a common platform through which information sharing and exchange becomes possible. Schema implantation and semantic evolution, our approach to the metadata folding problem, is a partial database integration scheme in which remote and local (meta)data are integrated in a stepwise manner over time. We introduce metadata implantation and stepwise evolution techniques to interrelate database elements in different databases, and to resolve conflicts on the structure and semantics of database elements (classes, attributes, and individual instances). We employ a semantically rich canonical data model, and an incremental integration and semantic heterogeneity resolution scheme. In our approach, relationships between local and remote information units are determined whenever enough knowledge about their semantics is acquired. The metadata folding problem is solved by implanting remote database elements into the local database, a process that imports remote database elements into the local database environment, hypothesizes the relevance of local and remote classes, and customizes the organization of remote metadata. We have implemented a prototype system and demonstrated its use in an experimental neuroscience environment.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "database integration; database interoperability; federated databases; schema evolution; semantic heterogeneity resolution", remark = "Check month: April or May??", } @Article{Law:1999:ESI, author = "Kelvin K. W. Law and John C. S. Lui and Leana Golubchik", title = "Efficient Support for Interactive Service in Multi-Resolution {VOD} Systems", journal = j-VLDB-J, volume = "8", number = "2", pages = "133--153", month = apr, year = "1999", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:50 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html; http://link.springer.de/link/service/journals/00778/tocs/t9008002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Golubchik:Leana.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Law:Kelvin_K=_W=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lui:John_C=_S=.html; http://link.springer.de/link/service/journals/00778/bibs/9008002/90080133.htm; http://link.springer.de/link/service/journals/00778/papers/9008002/90080133.pdf", abstract = "Advances in high-speed networks and multimedia technologies have made it feasible to provide video-on-demand (VOD) services to users. However, it is still a challenging task to design a cost-effective VOD system that can support a large number of clients (who may have different quality of service (QoS) requirements) and, at the same time, provide different types of VCR functionalities. Although it has been recognized that VCR operations are important functionalities in providing VOD service, techniques proposed in the past for providing VCR operations may require additional system resources, such as extra disk I/O, additional buffer space, as well as network bandwidth. In this paper, we consider the design of a VOD storage server that has the following features: (1) provision of different levels of display resolutions to users who have different QoS requirements, (2) provision of different types of VCR functionalities, such as fast forward and rewind, without imposing additional demand on the system buffer space, I/O bandwidth, and network bandwidth, and (3) guarantees of the load-balancing property across all disks during normal and VCR display periods. The above-mentioned features are especially important because they simplify the design of the buffer space, I/O, and network resource allocation policies of the VOD storage system. The load-balancing property also ensures that no single disk will be the bottleneck of the system. In this paper, we propose data block placement, admission control, and I/O-scheduling algorithms, as well as determine the corresponding buffer space requirements of the proposed VOD storage system. We show that the proposed VOD system can provide VCR and multi-resolution services to the viewing clients and at the same time maintain the load-balancing property.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "interactive services; multi-resolution services; multimedia servers; VOD systems", remark = "Check month: April or May??", } @Article{Shmueli:2000:FVP, author = "O. Shmueli and J. Widom", title = "Foreword by the {VLDB} `98 {PC Chairmen}", journal = j-VLDB-J, volume = "8", number = "3--4", pages = "155--155", month = feb, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:51 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Widom:2000:BPV, author = "Jennifer Widom and Oded Shmueli", title = "Best Papers of {VLDB `98, New York: Foreword by the VLDB `98 PC Chairmen: Best Papers of VLDB `98}", journal = j-VLDB-J, volume = "8", number = "3--4", pages = "155--155", month = feb, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Sep 27 10:11:55 MDT 2000", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html; http://link.springer.de/link/service/journals/00778/tocs/t0008003.htm; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Shmueli:Oded.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/w/Widom:Jennifer.html; http://link.springer.de/link/service/journals/00778/bibs/0008003/00080155.htm; http://link.springer.de/link/service/journals/00778/papers/0008003/00080155.pdf", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", xxauthor = "O. Shmueli and J. Widom", } @Article{Braumandl:2000:FJP, author = "Reinhard Braumandl and Jens Clau{\ss}en and Alfons Kemper and Donald Kossmann", title = "Functional-Join Processing", journal = j-VLDB-J, volume = "8", number = "3--4", pages = "156--177", month = feb, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:51 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html; http://link.springer.de/link/service/journals/00778/tocs/t0008003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Braumandl:Reinhard.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Clau=szlig=en:Jens.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kemper:Alfons.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kossmann:Donald.html; http://link.springer.de/link/service/journals/00778/bibs/0008003/00080156.htm; http://link.springer.de/link/service/journals/00778/papers/0008003/00080156.pdf", abstract = "Inter-object references are one of the key concepts of object-relational and object-oriented database systems. In this work, we investigate alternative techniques to implement inter-object references and make the best use of them in query processing, i.e., in evaluating functional joins. We will give a comprehensive overview and performance evaluation of all known techniques for simple (single-valued) as well as multi-valued functional joins. Furthermore, we will describe special {\em order-preserving\/\/} functional-join techniques that are particularly attractive for decision support queries that require ordered results. While most of the presentation of this paper is focused on object-relational and object-oriented database systems, some of the results can also be applied to plain relational databases because {\em index nested-loop joins\/\/} along key/foreign-key relationships, as they are frequently found in relational databases, are just one particular way to execute a functional join.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "functional join; logical OID; object identifier; order-preserving join; physical OID; pointer join; query processing", } @Article{George:2000:SBF, author = "Binto George and Jayant R. Haritsa", title = "Secure Buffering in Firm Real-Time Database Systems", journal = j-VLDB-J, volume = "8", number = "3--4", pages = "178--198", month = feb, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:51 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html; http://link.springer.de/link/service/journals/00778/tocs/t0008003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/George:Binto.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Haritsa:Jayant_R=.html; http://link.springer.de/link/service/journals/00778/bibs/0008003/00080178.htm; http://link.springer.de/link/service/journals/00778/papers/0008003/00080178.pdf", abstract = "Many real-time database applications arise in electronic financial services, safety-critical installations and military systems where enforcing is crucial to the success of the enterprise. We investigate here the performance implications, in terms of killed transactions, of guaranteeing {\em multi-level secrecy\/} in a real-time database system supporting applications with {\em firm\/} deadlines. In particular, we focus on the {\em buffer management\/} aspects of this issue. Our main contributions are the following. First, we identify the importance and difficulties of providing secure buffer management in the real-time database environment. Second, we present a novel buffer management algorithm that provides {\em covert-channel-free\/} security. SABRE employs a fully dynamic one-copy allocation policy for efficient usage of buffer resources. It also incorporates several optimizations for reducing the overall number of killed transactions and for decreasing the unfairness in the distribution of killed transactions across security levels. Third, using a detailed simulation model, the real-time performance of SABRE is evaluated against unsecure conventional and real-time buffer management policies for a variety of security-classified transaction workloads and system configurations. Our experiments show that SABRE provides security with only a modest drop in real-time performance. Finally, we evaluate SABRE's performance when augmented with the GUARD adaptive admission control policy. Our experiments show that this combination provides close to ideal fairness for real-time applications that can tolerate covert-channel bandwidths of up to one bit per second (a limit specified in military standards).", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "buffer management; covert channels; firm deadlines; real-time database", } @Article{Muth:2000:LLS, author = "Peter Muth and Patrick E. O'Neil and Achim Pick and Gerhard Weikum", title = "The {LHAM} Log-Structured History Data Access Method", journal = j-VLDB-J, volume = "8", number = "3--4", pages = "199--221", month = feb, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:51 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html; http://link.springer.de/link/service/journals/00778/tocs/t0008003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Muth:Peter.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/o/O=Neil:Patrick_E=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Pick:Achim.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/w/Weikum:Gerhard.html; http://link.springer.de/link/service/journals/00778/bibs/0008003/00080199.htm; http://link.springer.de/link/service/journals/00778/papers/0008003/00080199.pdf", abstract = "Numerous applications such as stock market or medical information systems require that both historical and current data be logically integrated into a temporal database. The underlying access method must support different forms of ``time-travel'' queries, the migration of old record versions onto inexpensive archive media, and high insertion and update rates. This paper presents an access method for transaction-time temporal data, called the log-structured history data access method (LHAM) that meets these demands. The basic principle of LHAM is to partition the data into successive components based on the timestamps of the record versions. Components are assigned to different levels of a storage hierarchy, and incoming data is continuously migrated through the hierarchy. The paper discusses the LHAM concepts, including concurrency control and recovery, our full-fledged LHAM implementation, and experimental performance results based on this implementation. A detailed comparison with the TSB-tree, both analytically and based on experiments with real implementations, shows that LHAM is highly superior in terms of insert performance, while query performance is in almost all cases at least as good as for the TSB-tree; in many cases it is much better.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data warehouses; index structures; performance; storage systems; temporal databases", } @Article{Gibson:2000:CCD, author = "David Gibson and Jon M. Kleinberg and Prabhakar Raghavan", title = "Clustering Categorical Data: An Approach Based on Dynamical Systems", journal = j-VLDB-J, volume = "8", number = "3--4", pages = "222--236", month = feb, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:51 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html; http://link.springer.de/link/service/journals/00778/tocs/t0008003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Gibson:David.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kleinberg:Jon_M=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Raghavan:Prabhakar.html; http://link.springer.de/link/service/journals/00778/bibs/0008003/00080222.htm; http://link.springer.de/link/service/journals/00778/papers/0008003/00080222.pdf", abstract = "We describe a novel approach for clustering collections of sets, and its application to the analysis and mining of categorical data. By ``categorical data,'' we mean tables with fields that cannot be naturally ordered by a metric --- e.g., the names of producers of automobiles, or the names of products offered by a manufacturer. Our approach is based on an iterative method for assigning and propagating weights on the categorical values in a table; this facilitates a type of similarity measure arising from the co-occurrence of values in the dataset. Our techniques can be studied analytically in terms of certain types of non-linear dynamical systems.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "categorical data; clustering; data mining; dynamical systems; hypergraphs", } @Article{Knorr:2000:DBO, author = "Edwin M. Knorr and Raymond T. Ng and Vladimir Tucakov", title = "Distance-Based Outliers: Algorithms and Applications", journal = j-VLDB-J, volume = "8", number = "3--4", pages = "237--253", month = feb, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:51 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html; http://link.springer.de/link/service/journals/00778/tocs/t0008003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Knorr:Edwin_M=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/n/Ng:Raymond_T=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Tucakov:V=.html; http://link.springer.de/link/service/journals/00778/bibs/0008003/00080237.htm; http://link.springer.de/link/service/journals/00778/papers/0008003/00080237.pdf", abstract = "This paper deals with finding outliers (exceptions) in large, multidimensional datasets. The identification of outliers can lead to the discovery of truly unexpected knowledge in areas such as electronic commerce, credit card fraud, and even the analysis of performance statistics of professional athletes. Existing methods that we have seen for finding outliers can only deal efficiently with two dimensions/attributes of a dataset. In this paper, we study the notion of {\em DB\/} ({\em distance-based\/}) outliers. Specifically, we show that (i) outlier detection can be done {\em efficiently\/} for {\em large\/} datasets, and for $k$-dimensional datasets with large values of $k$ (e.g., $ k \ge 5$); and (ii), outlier detection is a {\em meaningful\/} and important knowledge discovery task. First, we present two simple algorithms, both having a complexity of $ O(k \: N^2)$, $k$ being the dimensionality and $N$ being the number of objects in the dataset. These algorithms readily support datasets with many more than two attributes. Second, we present an optimized cell-based algorithm that has a complexity that is linear with respect to $N$, but exponential with respect to $k$. We provide experimental results indicating that this algorithm significantly outperforms the two simple algorithms for $ k \leq 4$. Third, for datasets that are mainly disk-resident, we present another version of the cell-based algorithm that guarantees at most three passes over a dataset. Again, experimental results show that this algorithm is by far the best for $ k \leq 4$. Finally, we discuss our work on three real-life applications, including one on spatio-temporal data (e.g., a video surveillance application), in order to confirm the relevance and broad applicability of {\em DB\/} outliers.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "algorithms; data mining; data mining applications; outliers\slash exceptions", } @Article{Korn:2000:QDM, author = "Flip Korn and Alexandros Labrinidis and Yannis Kotidis and Christos Faloutsos", title = "Quantifiable Data Mining Using Ratio Rules", journal = j-VLDB-J, volume = "8", number = "3--4", pages = "254--266", month = feb, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:51 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html; http://link.springer.de/link/service/journals/00778/tocs/t0008003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/f/Faloutsos:Christos.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Korn:Flip.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kotidis:Yannis.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Labrinidis:Alexandros.html; http://link.springer.de/link/service/journals/00778/bibs/0008003/00080254.htm; http://link.springer.de/link/service/journals/00778/papers/0008003/00080254.pdf", abstract = "Association Rule Mining algorithms operate on a data matrix (e.g., customers $ \times $ products) to derive association rules [AIS93b, SA96]. We propose a new paradigm, namely, {\em Ratio Rules}, which are quantifiable in that we can measure the ``goodness'' of a set of discovered rules. We also propose the ``guessing error'' as a measure of the ``goodness'', that is, the root-mean-square error of the reconstructed values of the cells of the given matrix, when we pretend that they are unknown. Another contribution is a novel method to guess missing/hidden values from the Ratio Rules that our method derives. For example, if somebody bought $ 10 o f m i l k a n d $3 of bread, our rules can ``guess'' the amount spent on butter. Thus, unlike association rules, Ratio Rules can perform a variety of important tasks such as forecasting, answering ``what-if'' scenarios, detecting outliers, and visualizing the data. Moreover, we show that we can compute Ratio Rules in a {\em single\/} pass over the data set with small memory requirements (a few small matrices), in contrast to association rule mining methods which require multiple passes and/or large memory. Experiments on several real data sets (e.g., basketball and baseball statistics, biological data) demonstrate that the proposed method: (a) leads to rules that make sense; (b) can find large itemsets in binary matrices, even in the presence of noise; and (c) consistently achieves a ``guessing error'' of up to 5 times less than using straightforward column averages.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data mining; forecasting; guessing error; knowledge discovery", } @Article{Torp:2000:ETD, author = "Kristian Torp and Christian S. Jensen and Richard Thomas Snodgrass", title = "Effective Timestamping in Databases", journal = j-VLDB-J, volume = "8", number = "3--4", pages = "267--288", month = feb, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:51 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html; http://link.springer.de/link/service/journals/00778/tocs/t0008003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/j/Jensen:Christian_S=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Snodgrass:Richard_T=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Torp:Kristian.html; http://link.springer.de/link/service/journals/00778/bibs/0008003/00080267.htm; http://link.springer.de/link/service/journals/00778/papers/0008003/00080267.pdf", abstract = "Many existing database applications place various timestamps on their data, rendering temporal values such as dates and times prevalent in database tables. During the past two decades, several dozen temporal data models have appeared, all with timestamps being integral components. The models have used timestamps for encoding two specific temporal aspects of database facts, namely transaction time, when the facts are current in the database, and valid time, when the facts are true in the modeled reality. However, with few exceptions, the assignment of timestamp values has been considered only in the context of individual modification statements. This paper takes the next logical step: It considers the use of timestamping for capturing transaction and valid time in the context of transactions. The paper initially identifies and analyzes several problems with straightforward timestamping, then proceeds to propose a variety of techniques aimed at solving these problems. Timestamping the results of a transaction with the commit time of the transaction is a promising approach. The paper studies how this timestamping may be done using a spectrum of techniques. While many database facts are valid until {\em now}, the current time, this value is absent from the existing temporal types. Techniques that address this problem using different substitute values are presented. Using a stratum architecture, the performance of the different proposed techniques are studied. Although querying and modifying time-varying data is accompanied by a number of subtle problems, we present a comprehensive approach that provides application programmers with simple, consistent, and efficient support for modifying bitemporal databases in the context of user transactions.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "timestamping; transactions", } @Article{Sheikholeslami:2000:WWB, author = "Gholamhosein Sheikholeslami and Surojit Chatterjee and Aidong Zhang", title = "{WaveCluster}: a Wavelet Based Clustering Approach for Spatial Data in Very Large Databases", journal = j-VLDB-J, volume = "8", number = "3--4", pages = "289--304", month = feb, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:51 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html; http://link.springer.de/link/service/journals/00778/tocs/t0008003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chatterjee:Surojit.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sheikholeslami:Gholamhosein.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/z/Zhang:Aidong.html; http://link.springer.de/link/service/journals/00778/bibs/0008003/00080289.htm; http://link.springer.de/link/service/journals/00778/papers/0008003/00080289.pdf", abstract = "Many applications require the management of spatial data in a multidimensional feature space. Clustering large spatial databases is an important problem, which tries to find the densely populated regions in the feature space to be used in data mining, knowledge discovery, or efficient information retrieval. A good clustering approach should be efficient and detect clusters of arbitrary shape. It must be insensitive to the noise (outliers) and the order of input data. We propose {\em WaveCluster}, a novel clustering approach based on wavelet transforms, which satisfies all the above requirements. Using the multiresolution property of wavelet transforms, we can effectively identify arbitrarily shaped clusters at different degrees of detail. We also demonstrate that {\em WaveCluster\/} is highly efficient in terms of time complexity. Experimental results on very large datasets are presented, which show the efficiency and effectiveness of the proposed approach compared to the other recent clustering methods.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Pacitti:2000:UPS, author = "Esther Pacitti and Eric Simon", title = "Update Propagation Strategies to Improve Freshness in Lazy Master Replicated Databases", journal = j-VLDB-J, volume = "8", number = "3--4", pages = "305--318", month = feb, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:51 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html; http://link.springer.de/link/service/journals/00778/tocs/t0008003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Pacitti:Esther.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Simon:Eric.html; http://link.springer.de/link/service/journals/00778/bibs/0008003/00080305.htm; http://link.springer.de/link/service/journals/00778/papers/0008003/00080305.pdf", abstract = "Many distributed database applications need to replicate data to improve data availability and query response time. The two-phase commit protocol guarantees mutual consistency of replicated data but does not provide good performance. Lazy replication has been used as an alternative solution in several types of applications such as on-line financial transactions and telecommunication systems. In this case, mutual consistency is relaxed and the concept of freshness is used to measure the deviation between replica copies. In this paper, we propose two update propagation strategies that improve freshness. Both of them use immediate propagation: updates to a primary copy are propagated towards a slave node as soon as they are detected at the master node without waiting for the commitment of the update transaction. Our performance study shows that our strategies can improve data freshness by up to five times compared with the deferred approach.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data replication; distributed databases; performance evaluation", } @Article{Liang:2000:OMD, author = "Weifa Liang and Maria E. Orlowska and Jeffrey X. Yu", title = "Optimizing Multiple Dimensional Queries Simultaneously in Multidimensional Databases", journal = j-VLDB-J, volume = "8", number = "3--4", pages = "319--338", month = feb, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:51 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html; http://link.springer.de/link/service/journals/00778/tocs/t0008003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Liang:Weifa.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/o/Orlowska:Maria_E=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/y/Yu:Jeffrey_X=.html; http://link.springer.de/link/service/journals/00778/bibs/0008003/00080319.htm; http://link.springer.de/link/service/journals/00778/papers/0008003/00080319.pdf", abstract = "Some significant progress related to multidimensional data analysis has been achieved in the past few years, including the design of fast algorithms for computing datacubes, selecting some precomputed group-bys to materialize, and designing efficient storage structures for multidimensional data. However, little work has been carried out on multidimensional query optimization issues. Particularly the response time (or evaluation cost) for answering several related dimensional queries simultaneously is crucial to the OLAP applications. Recently, Zhao et al. first exploited this problem by presenting three heuristic algorithms. In this paper we first consider in detail two cases of the problem in which all the queries are either hash-based star joins or index-based star joins only. In the case of the hash-based star join, we devise a polynomial approximation algorithm which delivers a plan whose evaluation cost is $ O(n^\epsilon) $ times the optimal, where $n$ is the number of queries and $ \epsilon $ is a fixed constant with $ 0 < \epsilon \leq 1$. We also present an exponential algorithm which delivers a plan with the optimal evaluation cost. In the case of the index-based star join, we present a heuristic algorithm which delivers a plan whose evaluation cost is $n$ times the optimal, and an exponential algorithm which delivers a plan with the optimal evaluation cost. We then consider a general case in which both hash-based star-join and index-based star-join queries are included. For this case, we give a possible improvement on the work of Zhao et al., based on an analysis of their solutions. We also develop another heuristic and an exact algorithm for the problem. We finally conduct a performance study by implementing our algorithms. The experimental results demonstrate that the solutions delivered for the restricted cases are always within two times of the optimal, which confirms our theoretical upper bounds. Actually these experiments produce much better results than our theoretical estimates. To the best of our knowledge, this is the only development of polynomial algorithms for the first two cases which are able to deliver plans with deterministic performance guarantees in terms of the qualities of the plans generated. The previous approaches including that of [ZDNS98] may generate a feasible plan for the problem in these two cases, but they do not provide any performance guarantee, i.e., the plans generated by their algorithms can be arbitrarily far from the optimal one.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data warehousing; MDDBs; multiple dimensional query optimization; OLAP; query modeling", } @Article{Atzeni:2000:DWG, author = "Paolo Atzeni and Alberto O. Mendelzon", title = "Databases and the {Web}: Guest Editorial: Databases and the {Web}", journal = j-VLDB-J, volume = "9", number = "1", pages = "1--1", month = mar, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Sep 27 10:11:55 MDT 2000", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb9.html; http://link.springer.de/link/service/journals/00778/tocs/t0009001.htm; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Atzeni:Paolo.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Mendelzon:Alberto_O=.html; http://link.springer.de/link/service/journals/00778/bibs/0009001/00090001.htm; http://link.springer.de/link/service/journals/00778/papers/0009001/00090001.pdf", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Atzeni:2000:GE, author = "Paolo Atzeni and Alberto O. Mendelzon", title = "Guest editorial", journal = j-VLDB-J, volume = "9", number = "1", pages = "1--1", month = mar, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:52 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chidlovskii:2000:SCW, author = "Boris Chidlovskii and Uwe M. Borghoff", title = "Semantic caching of {Web} queries", journal = j-VLDB-J, volume = "9", number = "1", pages = "2--17", month = mar, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:52 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb9.html; http://link.springer.de/link/service/journals/00778/tocs/t0009001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Borghoff:Uwe_M=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chidlovskii:Boris.html; http://link.springer.de/link/service/journals/00778/bibs/0009001/00090002.htm; http://link.springer.de/link/service/journals/00778/papers/0009001/00090002.pdf", abstract = "In meta-searchers accessing distributed Web-based information repositories, performance is a major issue. Efficient query processing requires an appropriate caching mechanism. Unfortunately, standard page-based as well as tuple-based caching mechanisms designed for conventional databases are not efficient on the Web, where keyword-based querying is often the only way to retrieve data. In this work, we study the problem of semantic caching of Web queries and develop a caching mechanism for conjunctive Web queries based on {\em signature files}. Our algorithms cope with both relations of semantic containment and intersection between a query and the corresponding cache items. We also develop the cache replacement strategy to treat situations when cached items differ in size and contribution when providing partial query answers. We report results of experiments and show how the caching mechanism is realized in the Knowledge Broker system.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "experiments; query algorithms; region containment; semantic caching; signature files", } @Article{Gruser:2000:LRT, author = "Jean-Robert Gruser and Louiqa Raschid and Vladimir Zadorozhny and Tao Zhan", title = "Learning response time for {WebSources} using query feedback and application in query optimization", journal = j-VLDB-J, volume = "9", number = "1", pages = "18--37", month = mar, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:52 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb9.html; http://link.springer.de/link/service/journals/00778/tocs/t0009001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Gruser:Jean=Robert.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Raschid:Louiqa.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/z/Zadorozhny:Vladimir.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/z/Zhan:Tao.html; http://link.springer.de/link/service/journals/00778/bibs/0009001/00090018.htm; http://link.springer.de/link/service/journals/00778/papers/0009001/00090018.pdf", abstract = "The rapid growth of the Internet and support for interoperability protocols has increased the number of Web accessible sources, WebSources. Current wrapper mediator architectures need to be extended with a wrapper cost model (WCM) for WebSources that can estimate the response time (delays) to access sources as well as other relevant statistics. In this paper, we present a Web prediction tool (WebPT), a tool that is based on learning using query feedback from WebSources. The WebPT uses dimensions time of day, day, and quantity of data, to learn response times from a particular WebSource, and to predict the expected response time (delay) for some query. Experiment data was collected from several sources, and those dimensions that were significant in estimating the response time were determined. We then trained the WebPT on the collected data, to use the three dimensions mentioned above, and to predict the response time, as well as a confidence in the prediction. We describe the WebPT learning algorithms, and report on the WebPT learning for WebSources. Our research shows that we can improve the quality of learning by tuning the WebPT features, e.g., training the WebPT using a logarithm of the input training data; including significant dimensions in the WebPT; or changing the ordering of dimensions. A comparison of the WebPT with more traditional neural network (NN) learning has been performed, and we briefly report on the comparison. We then demonstrate how the WebPT prediction of delay may be used by a scrambling enabled optimizer. A scrambling algorithm identifies some critical points of delay, where it makes a decision to scramble (modify) a plan, to attempt to hide the expected delay by computing some other part of the plan that is unaffected by the delay. We explore the space of real delay at a WebSource, versus the WebPT prediction of this delay, with respect to critical points of delay in specific plans. We identify those cases where WebPT overestimation or underestimation of the real delay results in a penalty in the scrambling enabled optimizer, and those cases where there is no penalty. Using the experimental data and WebPT learning, we test how good the WebPT is in minimizing these penalties.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data-intensive applications on the Web; query languages and systems for Web data", } @Article{Fernandez:2000:DSW, author = "Mary Fern{\'a}ndez and Daniela Florescu and Alon Levy and Dan Suciu", title = "Declarative specification of {Web} sites with {S}", journal = j-VLDB-J, volume = "9", number = "1", pages = "38--55", month = mar, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:52 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb9.html; http://link.springer.de/link/service/journals/00778/tocs/t0009001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/f/Fernandez:Mary_F=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/f/Florescu:Daniela.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Levy:Alon_Y=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Suciu:Dan.html; http://link.springer.de/link/service/journals/00778/bibs/0009001/00090038.htm; http://link.springer.de/link/service/journals/00778/papers/0009001/00090038.pdf", abstract = "S is a system for implementing {\em data-intensive\/} Web sites, which typically integrate information from multiple data sources and have complex structure. S's key idea is separating the management of a Web site's data, the specification of its content and structure, and the visual representation of its pages. S provides a declarative {\em query language\/} for specifying a site's content and structure, and a simple {\em template language\/} for specifying a site's HTML representation. This paper contains a comprehensive description of the S system and details the benefits of declarative site specification. We describe our experiences using S in a production application and describe three different, but complementary, systems that extend and improve upon S's original ideas.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "declarative query languages; web-site management", xxauthor = "Mary F. Fernandez and Daniela Florescu and Alon Y. Levy and Dan Suciu", xxtitle = "Declarative Specification of {Web} Sites with {Strudel}", } @Article{Berendt:2000:ANB, author = "Bettina Berendt and Myra Spiliopoulou", title = "Analysis of navigation behaviour in {Web} sites integrating multiple information systems", journal = j-VLDB-J, volume = "9", number = "1", pages = "56--75", month = mar, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:52 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb9.html; http://link.springer.de/link/service/journals/00778/tocs/t0009001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Berendt:Bettina.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Spiliopoulou:Myra.html; http://link.springer.de/link/service/journals/00778/bibs/0009001/00090056.htm; http://link.springer.de/link/service/journals/00778/papers/0009001/00090056.pdf", abstract = "The analysis of web usage has mostly focused on sites composed of conventional static pages. However, huge amounts of information available in the web come from databases or other data collections and are presented to the users in the form of dynamically generated pages. The query interfaces of such sites allow the specification of many search criteria. Their generated results support navigation to pages of results combining cross-linked data from many sources. For the analysis of visitor navigation behaviour in such web sites, we propose the web usage miner (WUM), which discovers navigation patterns subject to advanced statistical and structural constraints. Since our objective is the discovery of interesting navigation patterns, we do not focus on accesses to individual pages. Instead, we construct conceptual hierarchies that reflect the query capabilities used in the production of those pages. Our experiments with a real web site that integrates data from multiple databases, the German SchulWeb, demonstrate the appropriateness of WUM in discovering navigation patterns and show how those discoveries can help in assessing and improving the quality of the site.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "conceptual hierarchies; data mining; query capabilities; Web databases; Web query interfaces; Web usage mining", } @Article{Buneman:2000:UQL, author = "Peter Buneman and Mary F. Fernandez and Dan Suciu", title = "{UnQL}: a query language and algebra for semistructured data based on structural recursion", journal = j-VLDB-J, volume = "9", number = "1", pages = "76--110", month = mar, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:52 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb9.html; http://link.springer.de/link/service/journals/00778/tocs/t0009001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Buneman:Peter.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/f/Fernandez:Mary_F=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Suciu:Dan.html; http://link.springer.de/link/service/journals/00778/bibs/0009001/00090076.htm; http://link.springer.de/link/service/journals/00778/papers/0009001/00090076.pdf", abstract = "This paper presents structural recursion as the basis of the syntax and semantics of query languages for semistructured data and XML. We describe a simple and powerful query language based on pattern matching and show that it can be expressed using structural recursion, which is introduced as a top-down, recursive function, similar to the way XSL is defined on XML trees. On cyclic data, structural recursion can be defined in two equivalent ways: as a recursive function which evaluates the data top-down and remembers all its calls to avoid infinite loops, or as a bulk evaluation which processes the entire data in parallel using only traditional relational algebra operators. The latter makes it possible for optimization techniques in relational queries to be applied to structural recursion. We show that the composition of two structural recursion queries can be expressed as a single such query, and this is used as the basis of an optimization method for mediator systems. Several other formal properties are established: structural recursion can be expressed in first-order logic extended with transitive closure; its data complexity is PTIME; and over relational data it is a conservative extension of the relational calculus. The underlying data model is based on value equality, formally defined with bisimulation. Structural recursion is shown to be invariant with respect to value equality.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "optimization; query language; semistructured data; structural recursion; XML; XSL", } @Article{Mirbel:2000:CTI, author = "Isabelle Mirbel and Barbara Pernici and Timos K. Sellis and S. Tserkezoglou and Michalis Vazirgiannis", title = "Checking the Temporal Integrity of Interactive Multimedia Documents", journal = j-VLDB-J, volume = "9", number = "2", pages = "111--130", month = jul, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:53 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb9.html; http://link.springer.de/link/service/journals/00778/tocs/t0009002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Mirbel:Isabelle.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Pernici:Barbara.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sellis:Timos_K=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Tserkezoglou:S=.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/v/Vazirgiannis:Michalis.html; http://link.springer.de/link/service/journals/00778/bibs/0009002/00090111.htm; http://link.springer.de/link/service/journals/00778/papers/0009002/00090111.pdf", abstract = "When authoring multimedia scenarios, and in particular scenarios with user interaction, where the sequence and time of occurrence of interactions is not predefined, it is difficult to guarantee the consistency of the resulting scenarios. As a consequence, the {\em execution\/} of the scenario may result in unexpected behavior or inconsistent use of media. The present paper proposes a methodology for checking the temporal integrity of interactive multimedia document (IMD) scenarios at authoring time at various levels. The IMD flow is mainly defined by the events occurring during the IMD session. Integrity checking consists of a set of discrete steps, during which we transform the scenario into temporal constraint networks representing the constraints linking the different possible events in the scenario. Temporal constraint verification techniques are applied to verify the integrity of the scenario, deriving a minimal network, showing possible temporal relationships between events given a set of constraints.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "constraint networks; multimedia presentation; temporal integrity", } @Article{Candan:2000:VMM, author = "K. Sel{\c{c}}uk Candan and Eric Lemar and V. S. Subrahmanian", title = "View management in multimedia databases", journal = j-VLDB-J, volume = "9", number = "2", pages = "131--153", month = jul, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:53 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb9.html; http://link.springer.de/link/service/journals/00778/tocs/t0009002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Candan:K=_Sel=ccedil=uk.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lemar:Eric.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Subrahmanian:V=_S=.html; http://link.springer.de/link/service/journals/00778/bibs/0009002/00090131.htm; http://link.springer.de/link/service/journals/00778/papers/0009002/00090131.pdf", abstract = "Though there has been extensive work on multimedia databases in the last few years, there is no prevailing notion of a multimedia view, nor there are techniques to create, manage, and maintain such views. Visualizing the results of a dynamic multimedia query or materializing a dynamic multimedia view corresponds to assembling and delivering an interactive multimedia presentation in accordance with the visualization specifications. In this paper, we suggest that a non-interactive multimedia presentation is a set of {\em virtual objects\/} with associated spatial and temporal presentation constraints. A virtual object is either an object, or the result of a query. As queries may have different answers at different points in time, scheduling the presentation of such objects is nontrivial. We then develop a probabilistic model of interactive multimedia presentations, extending the non-interactive model described earlier. We also develop a probabilistic model of interactive visualization where the probabilities reflect the user profiles, or the likelihood of certain user interactions. Based on this probabilistic model, we develop three utility-theoretic based types of prefetching algorithms that anticipate how users will interact with the presentation. These prefetching algorithms allow efficient visualization of the query results in accordance with the underlying specification. We have built a prototype system that incorporates these algorithms. We report on the results of experiments conducted on top of this implementation.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "interactivity; multimedia databases; prefetching; result visualization\slash presentation; view management", } @Article{Fu:2000:DVT, author = "Ada Wai-chee Fu and Polly Mei-shuen Chan and Yin-Ling Cheung and Yiu Sang Moon", title = "Dynamic vp-Tree Indexing for $n$-Nearest Neighbor Search Given Pair-Wise Distances", journal = j-VLDB-J, volume = "9", number = "2", pages = "154--173", month = jul, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:53 MDT 2008", bibsource = "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb9.html; http://link.springer.de/link/service/journals/00778/tocs/t0009002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Electronic edition.", URL = "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chan:Polly_Mei=shuen.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Cheung:Yin=Ling.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/f/Fu:Ada_Wai=Chee.html; http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Moon:Yiu_Sang.html; http://link.springer.de/link/service/journals/00778/bibs/0009002/00090154.htm; http://link.springer.de/link/service/journals/00778/papers/0009002/00090154.pdf", abstract = "For some multimedia applications, it has been found that domain objects cannot be represented as feature vectors in a multidimensional space. Instead, pair-wise distances between data objects are the only input. To support content-based retrieval, one approach maps each object to a $k$ dimensional ($k$ d) point and tries to preserve the distances among the points. Then, existing spatial access index methods such as the R-trees and KD-trees can support fast searching on the resulting $k$ d points. However, information loss is inevitable with such an approach since the distances between data objects can only be preserved to a certain extent. Here we investigate the use of a distance-based indexing method. In particular, we apply the vantage point tree (vp-tree) method. There are two important problems for the vp-tree method that warrant further investigation, the $n$ nearest neighbors search and the updating mechanisms. We study an $n$ nearest neighbors search algorithm for the vp-tree, which is shown by experiments to scale up well with the size of the dataset and the desired number of nearest neighbors, $n$. Experiments also show that the searching in the vp-tree is more efficient than that for the $ R^*$-tree and the $M$-tree. Next, we propose solutions for the update problem for the vp-tree, and show by experiments that the algorithms are efficient and effective. Finally, we investigate the problem of selecting vantage-point, propose a few alternative methods, and study their impact on the number of distance computation.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "content-based retrieval; indexing; nearest neighbor search; pair-wise distances; updating", } @Article{Atkinson:2000:GE, author = "Malcolm P. Atkinson", title = "Guest editorial", journal = j-VLDB-J, volume = "9", number = "3", pages = "175--176", month = dec, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:54 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t0009003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/0009003/00090175.htm; http://link.springer.de/link/service/journals/00778/papers/0009003/00090175.pdf", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Bernstein:2000:CBP, author = "Philip A. Bernstein and Shankar Pal and David Shutt", title = "Context-based prefetch --- an optimization for implementing objects on relations", journal = j-VLDB-J, volume = "9", number = "3", pages = "177--189", month = dec, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:54 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t0009003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/0009003/00090177.htm; http://link.springer.de/link/service/journals/00778/papers/0009003/00090177.pdf", abstract = "When implementing persistent objects on a relational database, a major performance issue is prefetching data to minimize the number of round-trips to the database. This is especially hard with navigational applications, since future accesses are unpredictable. We propose the use of the context in which an object is loaded as a predictor of future accesses, where a context can be a stored collection of relationships, a query result, or a complex object. When an object O's state is loaded, similar state for other objects in O's context is prefetched. We present a design for maintaining context and for using it to guide prefetch. We give performance measurements of its implementation in Microsoft Repository, showing up to a 70\% reduction in running time. We describe several variations of the optimization: selectively applying the technique based on application and database characteristics, using application-supplied performance hints, using concurrent database queries to support asynchronous prefetch, prefetching across relationship paths, and delayed prefetch to save database round-trips.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "caching; object-oriented database; object-relational mapping; prefetch", } @Article{Claussen:2000:EES, author = "J. Claussen and A. Kemper and D. Kossmann and C. Wiesner", title = "Exploiting early sorting and early partitioning for decision support query processing", journal = j-VLDB-J, volume = "9", number = "3", pages = "190--213", month = dec, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:54 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t0009003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/0009003/00090190.htm; http://link.springer.de/link/service/journals/00778/papers/0009003/00090190.pdf", abstract = "Decision support queries typically involve several joins, a grouping with aggregation, and/or sorting of the result tuples. We propose two new classes of query evaluation algorithms that can be used to speed up the execution of such queries. The algorithms are based on (1) {\em early sorting\/} and (2) {\em early partitioning\/} --- or a combination of both. The idea is to push the sorting and/or the partitioning to the leaves, i.e., the base relations, of the query evaluation plans (QEPs) and thereby avoid sorting or partitioning large intermediate results generated by the joins. Both early sorting and early partitioning are used in combination with hash-based algorithms for evaluating the join(s) and the grouping. To enable early sorting, the sort order generated at an early stage of the QEP is retained through an arbitrary number of so-called {\em order-preserving hash joins}. To make early partitioning applicable to a large class of decision support queries, we generalize the so-called hash teams proposed by Graefe et al. [GBC98]. Hash teams allow to perform several hash-based operations (join and grouping) on the same attribute in one pass without repartitioning intermediate results. Our generalization consists of indirectly partitioning the input data. Indirect partitioning means partitioning the input data on an attribute that is not directly needed for the next hash-based operation, and it involves the construction of bitmaps to approximate the partitioning for the attribute that is needed in the next hash-based operation. Our performance experiments show that such QEPs based on {\em early sorting, early partitioning}, or both in combination perform significantly better than conventional strategies for many common classes of decision support queries.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "decision support systems; early sorting and partitioning; hash joins and hash teams; performance evaluation; query processing and optimization", } @Article{Jagadish:2000:ODM, author = "H. V. Jagadish and Olga Kapitskaia and Raymond T. Ng and Divesh Srivastava", title = "One-dimensional and multi-dimensional substring selectivity estimation", journal = j-VLDB-J, volume = "9", number = "3", pages = "214--230", month = dec, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:54 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t0009003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/0009003/00090214.htm; http://link.springer.de/link/service/journals/00778/papers/0009003/00090214.pdf", abstract = "With the increasing importance of XML, LDAP directories, and text-based information sources on the Internet, there is an ever-greater need to evaluate queries involving (sub)string matching. In many cases, matches need to be on multiple attributes/dimensions, with correlations between the multiple dimensions. Effective query optimization in this context requires good selectivity estimates. In this paper, we use pruned count-suffix trees (PSTs) as the basic data structure for substring selectivity estimation. For the 1-D problem, we present a novel technique called MO (Maximal Overlap). We then develop and analyze two 1-D estimation algorithms, MOC and MOLC, based on MO and a constraint-based characterization of all possible completions of a given PST. For the $k$-D problem, we first generalize PSTs to multiple dimensions and develop a space- and time-efficient probabilistic algorithm to construct $k$-D PSTs directly. We then show how to extend MO to multiple dimensions. Finally, we demonstrate, both analytically and experimentally, that MO is both practical and substantially superior to competing algorithms.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "maximal overlap; pruned count-suffix tree; short memory property; string selectivity", } @Article{Manegold:2000:ODA, author = "Stefan Manegold and Peter A. Boncz and Martin L. Kersten", title = "Optimizing database architecture for the new bottleneck: memory access", journal = j-VLDB-J, volume = "9", number = "3", pages = "231--246", month = dec, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:54 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t0009003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/0009003/00090231.htm; http://link.springer.de/link/service/journals/00778/papers/0009003/00090231.pdf", abstract = "In the past decade, advances in the speed of commodity CPUs have far out-paced advances in memory latency. Main-memory access is therefore increasingly a performance bottleneck for many computer applications, including database systems. In this article, we use a simple scan test to show the severe impact of this bottleneck. The insights gained are translated into guidelines for database architecture, in terms of both data structures and algorithms. We discuss how vertically fragmented data structures optimize cache performance on sequential data access. We then focus on equi-join, typically a random-access operation, and introduce radix algorithms for partitioned hash-join. The performance of these algorithms is quantified using a detailed analytical model that incorporates memory access cost. Experiments that validate this model were performed on the Monet database system. We obtained exact statistics on events such as TLB misses and L1 and L2 cache misses by using hardware performance counters found in modern CPUs. Using our cost model, we show how the carefully tuned memory access pattern of our radix algorithms makes them perform well, which is confirmed by experimental results.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "decomposed storage model; implementation techniques; join algorithms; main-memory databases; memory access optimization; query processing", } @Article{Raman:2000:ODR, author = "Vijayshankar Raman and Bhaskaran Raman and Joseph M. Hellerstein", title = "Online dynamic reordering", journal = j-VLDB-J, volume = "9", number = "3", pages = "247--260", month = dec, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:54 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t0009003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/0009003/00090247.htm; http://link.springer.de/link/service/journals/00778/papers/0009003/00090247.pdf", abstract = "We present a pipelining, dynamically tunable {\em reorder\/} operator for providing user control during long running, data-intensive operations. Users can see partial results and accordingly direct the processing by specifying preferences for various data items; data of interest is prioritized for early processing. The reordering mechanism is efficient and non-blocking and can be used over arbitrary data streams from files and indexes, as well as continuous data feeds. We also investigate several policies for the reordering based on the performance goals of various typical applications. We present performance results for reordering in the context of an online aggregation implementation in Informix and in the context of sorting and scrolling in a large-scale spreadsheet. Our experiments demonstrate that for a variety of data distributions and applications, reordering is responsive to dynamic preference changes, imposes minimal overheads in overall completion time, and provides dramatic improvements in the quality of the feedback over time. Surprisingly, preliminary experiments indicate that online reordering can also be useful in traditional batch query processing, because it can serve as a form of pipelined, approximate sorting.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Informix; interactive data processing; online reordering; user control", } @Article{Tan:2000:PEN, author = "Kian-Lee Tan and Cheng Hian Goh and Beng Chin Ooi", title = "Progressive evaluation of nested aggregate queries", journal = j-VLDB-J, volume = "9", number = "3", pages = "261--278", month = dec, year = "2000", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:54 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t0009003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/0009003/00090261.htm; http://link.springer.de/link/service/journals/00778/papers/0009003/00090261.pdf", abstract = "In many decision-making scenarios, decision makers require rapid feedback to their queries, which typically involve aggregates. The traditional {\em blocking execution model\/} can no longer meet the demands of these users. One promising approach in the literature, called {\em online aggregation}, evaluates an aggregation query progressively as follows: as soon as certain data have been evaluated, approximate answers are produced with their respective running confidence intervals; as more data are examined, the answers and their corresponding running confidence intervals are refined. In this paper, we extend this approach to handle nested queries with aggregates (i.e., at least one inner query block is an aggregate query) by providing users with (approximate) answers progressively as the inner aggregation query blocks are evaluated. We address the new issues pose by nested queries. In particular, the answer space begins with a superset of the final answers and is refined as the aggregates from the inner query blocks are refined. For the intermediary answers to be meaningful, they have to be interpreted with the aggregates from the inner queries. We also propose a {\em multi-threaded model\/} in evaluating such queries: each query block is assigned to a thread, and the threads can be evaluated concurrently and independently. The time slice across the threads is {\em nondeterministic\/} in the sense that the user controls the relative rate at which these subqueries are being evaluated. For {\em enumerative\/} nested queries, we propose a priority-based evaluation strategy to present answers that are certainly in the final answer space first, before presenting those whose validity may be affected as the inner query aggregates are refined. We implemented a prototype system using Java and evaluated our system. Results for nested queries with a level and multiple levels of nesting are reported. Our results show the effectiveness of the proposed mechanisms in providing progressive feedback that reduces the initial waiting time of users significantly without sacrificing the quality of the answers.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "approximate answers; multi-threading; nested aggregate queries; online aggregation; progressive query processing", } @Article{Ngu:2001:CMV, author = "Anne H. H. Ngu and Quan Z. Sheng and Du Q. Huynh and Ron Lei", title = "Combining multi-visual features for efficient indexing in a large image database", journal = j-VLDB-J, volume = "9", number = "4", pages = "279--293", month = apr, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100028", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:55 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1009004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1009004/10090279.htm; http://link.springer.de/link/service/journals/00778/papers/1009004/10090279.pdf", abstract = "The optimized distance-based access methods currently available for multidimensional indexing in multimedia databases have been developed based on two major assumptions: a suitable distance function is known a priori and the dimensionality of the image features is low. It is not trivial to define a distance function that best mimics human visual perception regarding image similarity measurements. Reducing high-dimensional features in images using the popular principle component analysis (PCA) might not always be possible due to the non-linear correlations that may be present in the feature vectors. We propose in this paper a fast and robust hybrid method for non-linear dimensions reduction of composite image features for indexing in large image database. This method incorporates both the PCA and non-linear neural network techniques to reduce the dimensions of feature vectors so that an optimized access method can be applied. To incorporate human visual perception into our system, we also conducted experiments that involved a number of subjects classifying images into different classes for neural network training. We demonstrate that not only can our neural network system reduce the dimensions of the feature vectors, but that the reduced dimensional feature vectors can also be mapped to an optimized access method for fast and accurate indexing.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "high-dimensional indexing; image retrieval; neural network", } @Article{Combi:2001:HTD, author = "Carlo Combi and Giuseppe Pozzi", title = "{{\em HMAP\/}} --- a temporal data model managing intervals with different granularities and indeterminacy from natural language sentences", journal = j-VLDB-J, volume = "9", number = "4", pages = "294--311", month = apr, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100033", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:55 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1009004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1009004/10090294.htm; http://link.springer.de/link/service/journals/00778/papers/1009004/10090294.pdf", abstract = "The {\em granularity\/} of given temporal information is the level of abstraction at which information is expressed. Different units of measure allow one to represent different granularities. Indeterminacy is often present in temporal information given at different granularities: temporal {\em indeterminacy\/} is related to incomplete knowledge of when the considered fact happened. Focusing on temporal databases, different granularities and indeterminacy have to be considered in expressing valid time, i.e., the time at which the information is true in the modeled reality. In this paper, we propose {\em HMAP\/} (The term is the transliteration of an ancient Greek poetical word meaning ``day''.), a temporal data model extending the capability of defining valid times with different granularity and/or with indeterminacy. In {\em HMAP}, absolute intervals are explicitly represented by their {\em start}, {\em end}, and {\em duration\/}: in this way, we can represent valid times as ``in December 1998 for five hours'', ``from July 1995, for 15 days'', ``from March 1997 to October 15, 1997, between 6 and 6:30 p.m.''. {\em HMAP\/} is based on a three-valued logic, for managing uncertainty in temporal relationships. Formulas involving different temporal relationships between intervals, instants, and durations can be defined, allowing one to query the database with different granularities, not necessarily related to that of data. In this paper, we also discuss the complexity of algorithms, allowing us to evaluate {\em HMAP\/} formulas, and show that the formulas can be expressed as constraint networks falling into the class of simple temporal problems, which can be solved in polynomial time.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "temporal databases; three-valued logic; time granularity; time indeterminacy", } @Article{Li:2001:SEM, author = "Wen-Syan Li and K. Sel{\c{c}}uk Candan and Kyoji Hirata and Yoshinori Hara", title = "Supporting efficient multimedia database exploration", journal = j-VLDB-J, volume = "9", number = "4", pages = "312--326", month = apr, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100040", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:55 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1009004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1009004/10090312.htm; http://link.springer.de/link/service/journals/00778/papers/1009004/10090312.pdf", abstract = "Due to the fuzziness of query specification and media matching, multimedia retrieval is conducted by way of exploration. It is essential to provide feedback so that users can visualize query reformulation alternatives and database content distribution. Since media matching is an expensive task, another issue is how to efficiently support exploration so that the system is not overloaded by perpetual query reformulation. In this paper, we present a uniform framework to represent statistical information of both semantics and visual metadata for images in the databases. We propose the concept of {\em query verification}, which evaluates queries using statistics, and provides users with feedback, including the strictness and reformulation alternatives of each query condition as well as estimated numbers of matches. With query verification, the system increases the efficiency of the multimedia database exploration for both users and the system. Such statistical information is also utilized to support progressive query processing and query relaxation.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "exploration; human computer interaction; multimedia database; progressive processing; query relaxation; selectivity statistics", } @Article{Lee:2001:GTM, author = "Chiang Lee and Chi-Sheng Shih and Yaw-Huei Chen", title = "A graph-theoretic model for optimizing queries involving methods", journal = j-VLDB-J, volume = "9", number = "4", pages = "327--343", month = apr, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100035", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:55 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1009004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1009004/10090327.htm; http://link.springer.de/link/service/journals/00778/papers/1009004/10090327.pdf", abstract = "Traditional algorithms for optimizing the execution order of joins are no more valid when selections and projections involve methods and become very expensive operations. Selections and projections could be even more costly than joins such that they are pulled above joins, rather than pushed down in a query tree. In this paper, we take a fundamental look at how to approach query optimization from a top-down design perspective, rather than trying to force one model to fit into another. We present a graph model which is designed to characterize execution plans. Each edge and each vertex of the graph is assigned a weight to model execution plans. We also design algorithms that use these weights to optimize the execution order of operations. A cost model of these algorithms is developed. Experiments are conducted on the basis of this cost model. The results show that our algorithms are superior to similar work proposed in the literature.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "graph model; method query; object-oriented databases; query optimization; spanning tree", } @Article{Wang:2001:IVH, author = "Changzhou Wang and X. Sean Wang", title = "Indexing very high-dimensional sparse and quasi-sparse vectors for similarity searches", journal = j-VLDB-J, volume = "9", number = "4", pages = "344--361", month = apr, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100036", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:55 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1009004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1009004/10090344.htm; http://link.springer.de/link/service/journals/00778/papers/1009004/10090344.pdf", abstract = "Similarity queries on complex objects are usually translated into searches among their feature vectors. This paper studies indexing techniques for very high-dimensional (e.g., in hundreds) vectors that are sparse or quasi-sparse, i.e., vectors {\em each\/} having only a small number (e.g., ten) of non-zero or significant values. Based on the R-tree, the paper introduces the xS-tree that uses lossy compression of bounding regions to guarantee a reasonable minimum fan-out within the allocated storage space for each node. In addition, the paper studies the performance and scalability of the xS-tree via experiments.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "high-dimensional indexing structure; lossy compression; quasi-sparse vector; similarity search; sparse vector", } @Article{Casati:2001:GE, author = "Fabio Casati and Ming-Chien Shan and Dimitrios Georgakopoulos", title = "Guest editorial", journal = j-VLDB-J, volume = "10", number = "1", pages = "1--1", month = aug, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100041", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:56 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1010001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1010001/10100001.htm; http://link.springer.de/link/service/journals/00778/papers/1010001/10100001.pdf", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Mecella:2001:DWC, author = "Massimo Mecella and Barbara Pernici", title = "Designing wrapper components for e-services in integrating heterogeneous systems", journal = j-VLDB-J, volume = "10", number = "1", pages = "2--15", month = aug, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100044", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:56 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1010001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1010001/10100002.htm; http://link.springer.de/link/service/journals/00778/papers/1010001/10100002.pdf", abstract = "Component-based approaches are becoming more and more popular to support Internet-based application development. Different component modeling approaches, however, can be adopted, obtaining different abstraction levels (either conceptual or operational). In this paper we present a component-based architecture for the design of e-applications, and discuss the concept of wrapper components as building blocks for the development of e-services, where these services are based on legacy systems. We discuss their characteristics and their applicability in Internet-based application development.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "component; cooperation; e-application; e-service; integration; legacy system; wrapper", } @Article{Eyal:2001:ICH, author = "Anat Eyal and Tova Milo", title = "Integrating and customizing heterogeneous e-commerce applications", journal = j-VLDB-J, volume = "10", number = "1", pages = "16--38", month = aug, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100045", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:56 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1010001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1010001/10100016.htm; http://link.springer.de/link/service/journals/00778/papers/1010001/10100016.pdf", abstract = "A broad spectrum of electronic commerce applications is currently available on the Web, providing services in almost any area one can think of. As the number and variety of such applications grow, more business opportunities emerge for providing new services based on the integration and customization of existing applications. (Web shopping malls and support for comparative shopping are just a couple of examples.) Unfortunately, the diversity of applications in each specific domain and the disparity of interfaces, application flows, actor roles in the business transaction, and data formats, renders the integration and manipulation of applications a rather difficult task. In this paper we present the {\em Application Manifold\/} system, aimed at simplifying the intricate task of integration and customization of e-commerce applications. The scope of the work in this paper is limited to web-enabled e-commerce applications. We do not support the integration/customization of proprietary/legacy applications. The wrapping of such applications as web services is complementary to our work. Based on the emerging Web data standard, XML, and application modeling standard, UML, the system offers a novel declarative specification language for describing the integration/customization task, supporting a modular approach where new applications can be added and integrated at will with minimal effort. Then, acting as an application generator, the system generates a full integrated/customized e-commerce application, with the declarativity of the specification allowing for the optimization and verification of the generated application. The integration here deals with the full profile of the given e-commerce applications: the various services offered by the applications, the activities and roles of the different actors participating in the application (e.g., customers, vendors), the application flow, as well as with the data involved in the process. This is in contrast to previous works on Web data integration that focused primarily on querying the data available in the applications, mostly ignoring the additional aspects mentioned above.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "application integration; data integration; electronic commerce", } @Article{Bonifati:2001:ARX, author = "Angela Bonifati and Stefano Ceri and Stefano Paraboschi", title = "Active rules for {XML}: a new paradigm for {E}-services", journal = j-VLDB-J, volume = "10", number = "1", pages = "39--47", month = aug, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100039", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:56 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1010001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1010001/10100039.htm; http://link.springer.de/link/service/journals/00778/papers/1010001/10100039.pdf", abstract = "XML is rapidly becoming one of the most widely adopted technologies for information exchange and representation. As the use of XML becomes more widespread, we foresee the development of active XML rules, i.e., rules explicitly designed for the management of XML information. In particular, we argue that active rules for XML offer a natural paradigm for the rapid development of innovative e-services. In the paper, we show how active rules can be specified in the context of XSLT, a pattern-based language for publishing XML documents (promoted by the W3C) which is receiving strong commercial support, and Lorel, a query language for XML documents that is quite popular in the research world. We demonstrate, through simple examples of active rules for XSLT and Lorel, that active rules can be effective for the implementation of e-commerce services. We also discuss the various issues that need to be considered in adapting the notion of relational triggers to the XML context.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "active databases; document management; query languages for XML; XML; XSLT", } @Article{Braumandl:2001:OUQ, author = "R. Braumandl and M. Keidl and A. Kemper and D. Kossmann and A. Kreutz and S. Seltzsam and K. Stocker", title = "{ObjectGlobe}: {Ubiquitous} query processing on the {Internet}", journal = j-VLDB-J, volume = "10", number = "1", pages = "48--71", month = aug, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100043", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:56 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1010001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1010001/10100048.htm; http://link.springer.de/link/service/journals/00778/papers/1010001/10100048.pdf", abstract = "We present the design of ObjectGlobe, a distributed and open query processor for Internet data sources. Today, data is published on the Internet via Web servers which have, if at all, very localized query processing capabilities. The goal of the ObjectGlobe project is to establish an open marketplace in which {\em data\/} and {\em query processing capabilities\/} can be distributed and used by any kind of Internet application. Furthermore, ObjectGlobe integrates {\em cycle providers\/} (i.e., machines) which carry out query processing operators. The overall picture is to make it possible to execute a query with --- in principle --- unrelated query operators, cycle providers, and data sources. Such an infrastructure can serve as enabling technology for scalable e-commerce applications, e.g., B2B and B2C market places, to be able to integrate data and data processing operations of a large number of participants. One of the main challenges in the design of such an open system is to ensure privacy and security. We discuss the ObjectGlobe security requirements, show how basic components such as the optimizer and runtime system need to be extended, and present the results of performance experiments that assess the additional cost for secure distributed query processing. Another challenge is quality of service management so that users can constrain the costs and running times of their queries.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "cycle-; distributed query processing; function- and data provider; open systems; privacy; quality of service; query optimization; security", } @Article{Su:2001:IBN, author = "Stanley Y. W. Su and Chunbo Huang and Joachim Hammer and Yihua Huang and Haifei Li and Liu Wang and Youzhong Liu and Charnyote Pluempitiwiriyawej and Minsoo Lee and Herman Lam", title = "An {Internet}-based negotiation server for e-commerce", journal = j-VLDB-J, volume = "10", number = "1", pages = "72--90", month = aug, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100051", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:56 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1010001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1010001/10100072.htm; http://link.springer.de/link/service/journals/00778/papers/1010001/10100072.pdf", abstract = "This paper describes the design and implementation of a replicable, Internet-based negotiation server for conducting bargaining-type negotiations between enterprises involved in e-commerce and e-business. Enterprises can be buyers and sellers of products/services or participants of a complex supply chain engaged in purchasing, planning, and scheduling. Multiple copies of our server can be installed to complement the services of Web servers. Each enterprise can install or select a trusted negotiation server to represent his/her interests. Web-based GUI tools are used during the build-time registration process to specify the requirements, constraints, and rules that represent negotiation policies and strategies, preference scoring of different data conditions, and aggregation methods for deriving a global cost-benefit score for the item(s) under negotiation. The registration information is used by the negotiation servers to automatically conduct bargaining type negotiations on behalf of their clients. In this paper, we present the architecture of our implementation as well as a framework for automated negotiations, and describe a number of communication primitives which are used in the underlying negotiation protocol. A constraint satisfaction processor (CSP) is used to evaluate a negotiation proposal or counterproposal against the registered requirements and constraints of a client company. In case of a constraint violation, an event is posted to trigger the execution of negotiation strategic rules, which either automatically relax the violated constraint, ask for human intervention, invoke an application, or perform other remedial operations. An Event-Trigger-Rule (ETR) server is used to manage events, triggers, and rules. Negotiation strategic rules can be added or modified at run-time. A cost-benefit analysis component is used to perform quantitative analysis of alternatives. The use of negotiation servers to conduct automated negotiation has been demonstrated in the context of an integrated supply chain scenario.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "constraint evaluation; cost-benefit analysis; database; e-commerce; negotiation policy and strategy; negotiation protocol", } @Article{Shegalov:2001:XEW, author = "German Shegalov and Michael Gillmann and Gerhard Weikum", title = "{XML}-enabled workflow management for e-services across heterogeneous platforms", journal = j-VLDB-J, volume = "10", number = "1", pages = "91--103", month = aug, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100038", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:56 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1010001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1010001/10100091.htm; http://link.springer.de/link/service/journals/00778/papers/1010001/10100091.pdf", abstract = "Advanced e-services require efficient, flexible, and easy-to-use workflow technology that integrates well with mainstream Internet technologies such as XML and Web servers. This paper discusses an XML-enabled architecture for distributed workflow management that is implemented in the latest version of our Mentor-lite prototype system. The key asset of this architecture is an XML mediator that handles the exchange of business and flow control data between workflow and business-object servers on the one hand and client activities on the other via XML messages over http. Our implementation of the mediator has made use of Oracle's XSQL servlet. The major benefit of the advocated architecture is that it provides seamless integration of client applications into e-service workflows with scalable efficiency and very little explicit coding, in contrast to an earlier, Java-based, version of our Mentor-lite prototype that required much more code and exhibited potential performance problems.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "business processes; information system interoperability; Internet e-services; workflow management; XML/XSL", } @Article{Datta:2001:ASS, author = "Anindya Datta and Kaushik Dutta and Debra VanderMeer and Krithi Ramamritham and Shamkant B. Navathe", title = "An architecture to support scalable online personalization on the {Web}", journal = j-VLDB-J, volume = "10", number = "1", pages = "104--117", month = aug, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100037", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:56 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1010001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1010001/10100104.htm; http://link.springer.de/link/service/journals/00778/papers/1010001/10100104.pdf", abstract = "Online personalization is of great interest to e-companies. Virtually all personalization technologies are based on the idea of storing as much historical customer session data as possible, and then querying the data store as customers navigate through a web site. The holy grail of online personalization is an environment where fine-grained, detailed historical session data can be queried based on current online navigation patterns for use in formulating real-time responses. Unfortunately, as more consumers become e-shoppers, the user load and the amount of historical data continue to increase, causing scalability-related problems for almost all current personalization technologies. This paper chronicles the development of a real-time interaction management system through the integration of historical data and online visitation patterns of e-commerce site visitors. It describes the scientific underpinnings of the system as well as its architecture. Experimental evaluation of the system shows that the caching and storage techniques built into the system deliver performance that is orders of magnitude better than those derived from off-the-shelf database components.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "behavior-based personalization; dynamic lookahead profile; profile caching; scalable online personalization; Web site and interaction model", } @Article{ElAbbadi:2001:GE, author = "Amr {El Abbadi} and Gunter Schlageter and Kyu-Young Whang", title = "Guest editorial", journal = j-VLDB-J, volume = "10", number = "2--3", pages = "119--119", month = sep, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100053", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:58 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1010002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1010002/10100119.htm; http://link.springer.de/link/service/journals/00778/papers/1010002/10100119.pdf", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Pucheral:2001:PSD, author = "Philippe Pucheral and Luc Bouganim and Patrick Valduriez and Christophe Bobineau", title = "{PicoDBMS}: {Scaling} down database techniques for the smartcard", journal = j-VLDB-J, volume = "10", number = "2--3", pages = "120--132", month = sep, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100047", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:58 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1010002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1010002/10100120.htm; http://link.springer.de/link/service/journals/00778/papers/1010002/10100120.pdf", abstract = "Smartcards are the most secure portable computing device today. They have been used successfully in applications involving money, and proprietary and personal data (such as banking, healthcare, insurance, etc.). As smartcards get more powerful (with 32-bit CPU and more than 1 MB of stable memory in the next versions) and become multi-application, the need for database management arises. However, smartcards have severe hardware limitations (very slow write, very little RAM, constrained stable memory, no autonomy, etc.) which make traditional database technology irrelevant. The major problem is scaling down database techniques so they perform well under these limitations. In this paper, we give an in-depth analysis of this problem and propose a PicoDBMS solution based on highly compact data structures, query execution without RAM, and specific techniques for atomicity and durability. We show the effectiveness of our techniques through performance evaluation.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "atomicity; durability; execution model; PicoDBMS; query optimization; smartcard applications; storage model", } @Article{Shanmugasundaram:2001:EPR, author = "Jayavel Shanmugasundaram and Eugene Shekita and Rimon Barr and Michael Carey and Bruce Lindsay and Hamid Pirahesh and Berthold Reinwald", title = "Efficiently publishing relational data as {XML} documents", journal = j-VLDB-J, volume = "10", number = "2--3", pages = "133--154", month = sep, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100052", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:58 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1010002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1010002/10100133.htm; http://link.springer.de/link/service/journals/00778/papers/1010002/10100133.pdf", abstract = "XML is rapidly emerging as a standard for exchanging business data on the World Wide Web. For the foreseeable future, however, most business data will continue to be stored in relational database systems. Consequently, if XML is to fulfill its potential, some mechanism is needed to publish relational data as XML documents. Towards that goal, one of the major challenges is finding a way to efficiently structure and tag data from one or more tables as a hierarchical XML document. Different alternatives are possible depending on when this processing takes place and how much of it is done inside the relational engine. In this paper, we characterize and study the performance of these alternatives. Among other things, we explore the use of new scalar and aggregate functions in SQL for constructing complex XML documents directly in the relational engine. We also explore different execution plans for generating the content of an XML document. The results of an experimental study show that constructing XML documents inside the relational engine can have a significant performance benefit. Our results also show the superiority of having the relational engine use what we call an ``outer union plan'' to generate the content of an XML document.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "publishing; relational databases; XML", } @Article{Chang:2001:AQM, author = "Kevin Chen-Chuan Chang and H{\'e}ctor Garc{\'\i}a-Molina", title = "Approximate query mapping: {Accounting} for translation closeness", journal = j-VLDB-J, volume = "10", number = "2--3", pages = "155--181", month = sep, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100042", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:58 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1010002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1010002/10100155.htm; http://link.springer.de/link/service/journals/00778/papers/1010002/10100155.pdf", abstract = "In this paper we present a mechanism for approximately translating Boolean query constraints across heterogeneous information sources. Achieving the best translation is challenging because sources support different constraints for formulating queries, and often these constraints cannot be precisely translated. For instance, a query [score>8] might be ``perfectly'' translated as [rating>0.8] at some site, but can only be approximated as [grade=A] at another. Unlike other work, our general framework adopts a customizable ``closeness'' metric for the translation that combines both precision and recall. Our results show that for query translation we need to handle interdependencies among both query conjuncts as well as disjuncts. As the basis, we identify the essential requirements of a rule system for users to encode the mappings for atomic semantic units. Our algorithm then translates complex queries by rewriting them in terms of the semantic units. We show that, under practical assumptions, our algorithm generates the best approximate translations with respect to the closeness metric of choice. We also present a case study to show how our technique may be applied in practice.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "approximate query translation; closeness; constraint-mapping; information integration; mediators", } @Article{Pottinger:2001:MSA, author = "Rachel Pottinger and Alon Halevy", title = "{MiniCon}: a scalable algorithm for answering queries using views", journal = j-VLDB-J, volume = "10", number = "2--3", pages = "182--198", month = sep, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100048", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:58 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1010002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1010002/10100182.htm; http://link.springer.de/link/service/journals/00778/papers/1010002/10100182.pdf", abstract = "The problem of answering queries using views is to find efficient methods of answering a query using a set of previously materialized views over the database, rather than accessing the database relations. The problem has received significant attention because of its relevance to a wide variety of data management problems, such as data integration, query optimization, and the maintenance of physical data independence. To date, the performance of proposed algorithms has received very little attention, and in particular, their scale up in the presence of a large number of views is unknown. We first analyze two previous algorithms, the bucket algorithm and the inverse-rules, and show their deficiencies. We then describe the MiniCon, a novel algorithm for finding the maximally-contained rewriting of a conjunctive query using a set of conjunctive views. We present the first experimental study of algorithms for answering queries using views. The study shows that the MiniCon scales up well and significantly outperforms the previous algorithms. We describe an extension of the MiniCon to handle comparison predicates, and show its performance experimentally. Finally, we describe how the MiniCon can be extended to the context of query optimization.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data integration; materialized views; query optimization; Web and databases", } @Article{Chakrabarti:2001:AQP, author = "Kaushik Chakrabarti and Minos Garofalakis and Rajeev Rastogi and Kyuseok Shim", title = "Approximate query processing using wavelets", journal = j-VLDB-J, volume = "10", number = "2--3", pages = "199--223", month = sep, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100049", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:58 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1010002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1010002/10100199.htm; http://link.springer.de/link/service/journals/00778/papers/1010002/10100199.pdf", abstract = "Approximate query processing has emerged as a cost-effective approach for dealing with the huge data volumes and stringent response-time requirements of today's decision support systems (DSS). Most work in this area, however, has so far been limited in its query processing scope, typically focusing on specific forms of aggregate queries. Furthermore, conventional approaches based on sampling or histograms appear to be inherently limited when it comes to approximating the results of complex queries over high-dimensional DSS data sets. In this paper, we propose the use of multi-dimensional wavelets as an effective tool for general-purpose approximate query processing in modern, high-dimensional applications. Our approach is based on building {\em wavelet-coefficient synopses\/} of the data and using these synopses to provide approximate answers to queries. We develop novel query processing algorithms that operate directly on the wavelet-coefficient synopses of relational tables, allowing us to process arbitrarily complex queries {\em entirely\/} in the wavelet-coefficient domain. This guarantees extremely fast response times since our approximate query execution engine can do the bulk of its processing over compact sets of wavelet coefficients, essentially postponing the expansion into relational tuples until the end-result of the query. We also propose a novel wavelet decomposition algorithm that can build these synopses in an I/O-efficient manner. Finally, we conduct an extensive experimental study with synthetic as well as real-life data sets to determine the effectiveness of our wavelet-based approach compared to sampling and histograms. Our results demonstrate that our techniques: (1) provide approximate answers of better quality than either sampling or histograms; (2) offer query execution-time speedups of more than two orders of magnitude; and (3) guarantee extremely fast synopsis construction times that scale linearly with the size of the data.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "approximate query answers; data synopses; query processing; wavelet decomposition", } @Article{Sarawagi:2001:UCM, author = "Sunita Sarawagi", title = "User-cognizant multidimensional analysis", journal = j-VLDB-J, volume = "10", number = "2--3", pages = "224--239", month = sep, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100046", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:58 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1010002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1010002/10100224.htm; http://link.springer.de/link/service/journals/00778/papers/1010002/10100224.pdf", abstract = "Our goal is to enhance multidimensional database systems with a suite of advanced operators to automate data analysis tasks that are currently handled through manual exploration. In this paper, we present a key component of our system that characterizes the information content of a cell based on a user's prior familiarity with the cube and provides a context-sensitive exploration of the cube. There are three main modules of this component. A Tracker, that continuously tracks the parts of the cube that a user has visited. A Modeler, that pieces together the information in the visited parts to model the user's expected values in the unvisited parts. An Informer, that processes user's queries about the most informative unvisited parts of the cube. The mathematical basis for the expected value modeling is provided by the classical maximum entropy principle. Accordingly, the expected values are computed so as to agree with every value that is already visited while reducing assumptions about unvisited values to the minimum by maximizing their entropy. The most informative values are defined as those that bring the new expected values closest to the actual values. We believe and prove through experiments that such a user-in-the-loop exploration will enable much faster assimilation of all significant information in the data compared to existing manual explorations.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "maximum entropy; multidimensional data exploration; OLAP; personalized mining; user-sensitive interest measure", } @Article{Turker:2001:SIS, author = "Can T{\"u}rker and Michael Gertz", title = "Semantic integrity support in {SQL:1999} and commercial (object-)relational database management systems", journal = j-VLDB-J, volume = "10", number = "4", pages = "241--269", month = dec, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100050", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:59 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1010004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1010004/10100241.htm; http://link.springer.de/link/service/journals/00778/papers/1010004/10100241.pdf", abstract = "The correctness of the data managed by database systems is vital to any application that utilizes data for business, research, and decision-making purposes. To guard databases against erroneous data not reflecting real-world data or business rules, semantic integrity constraints can be specified during database design. Current commercial database management systems provide various means to implement mechanisms to enforce semantic integrity constraints at database run-time. In this paper, we give an overview of the semantic integrity support in the most recent SQL-standard SQL:1999, and we show to what extent the different concepts and language constructs proposed in this standard can be found in major commercial (object-)relational database management systems. In addition, we discuss general design guidelines that point out how the semantic integrity features provided by these systems should be utilized in order to implement an effective integrity enforcing subsystem for a database.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "constraint enforcement; object-relational databases; semantic integrity constraints; SQL:1999", } @Article{Halevy:2001:AQU, author = "Alon Y. Halevy", title = "Answering queries using views: a survey", journal = j-VLDB-J, volume = "10", number = "4", pages = "270--294", month = dec, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100054", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:59 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1010004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1010004/10100270.htm; http://link.springer.de/link/service/journals/00778/papers/1010004/10100270.pdf", abstract = "The problem of answering queries using views is to find efficient methods of answering a query using a set of previously defined materialized views over the database, rather than accessing the database relations. The problem has recently received significant attention because of its relevance to a wide variety of data management problems. In query optimization, finding a rewriting of a query using a set of materialized views can yield a more efficient query execution plan. To support the separation of the logical and physical views of data, a storage schema can be described using views over the logical schema. As a result, finding a query execution plan that accesses the storage amounts to solving the problem of answering queries using views. Finally, the problem arises in data integration systems, where data sources can be described as precomputed views over a mediated schema. This article surveys the state of the art on the problem of answering queries using views, and synthesizes the disparate works into a coherent framework. We describe the different applications of the problem, the algorithms proposed to solve it and the relevant theoretical results.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data integration; date warehousing; materialized views; query optimization; survey; Web-site management", } @Article{Laurent:2001:MCI, author = "D. Laurent and J. Lechtenb{\"o}rger and N. Spyratos and G. Vossen", title = "Monotonic complements for independent data warehouses", journal = j-VLDB-J, volume = "10", number = "4", pages = "295--315", month = dec, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100055", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:59 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1010004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1010004/10100295.htm; http://link.springer.de/link/service/journals/00778/papers/1010004/10100295.pdf", abstract = "Views over databases have regained attention in the context of data warehouses, which are seen as {\em materialized\/} views. In this setting, efficient view maintenance is an important issue, for which the notion of {\em self-maintainability\/} has been identified as desirable. In this paper, we extend the concept of self-maintainability to (query and update) {\em independence\/} within a formal framework, where independence with respect to arbitrary given sets of queries and updates over the sources can be guaranteed. To this end we establish an intuitively appealing connection between warehouse independence and {\em view complements}. Moreover, we study special kinds of complements, namely {\em monotonic complements}, and show how to compute minimal ones in the presence of keys and foreign keys in the underlying databases. Taking advantage of these complements, an algorithmic approach is proposed for the specification of independent warehouses with respect to given sets of queries and updates.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data warehouse; independence; materialized view; self-maintainability; view complement", } @Article{Grefen:2001:GTS, author = "Paul Grefen and Jochem Vonk and Peter Apers", title = "Global transaction support for workflow management systems: from formal specification to practical implementation", journal = j-VLDB-J, volume = "10", number = "4", pages = "316--333", month = dec, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100056", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:59 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1010004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1010004/10100316.htm; http://link.springer.de/link/service/journals/00778/papers/1010004/10100316.pdf", abstract = "In this paper, we present an approach to global transaction management in workflow environments. The transaction mechanism is based on the well-known notion of compensation, but extended to deal with both arbitrary process structures to allow cycles in processes and safepoints to allow partial compensation of processes. We present a formal specification of the transaction model and transaction management algorithms in set and graph theory, providing clear, unambiguous transaction semantics. The specification is straightforwardly mapped to a modular architecture, the implementation of which is first applied in a testing environment, then in the prototype of a commercial workflow management system. The modular nature of the resulting system allows easy distribution using middleware technology. The path from abstract semantics specification to concrete, real-world implementation of a workflow transaction mechanism is thus covered in a complete and coherent fashion. As such, this paper provides a complete framework for the application of well-founded transactional workflows.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "compensation; long-running transaction; transaction management; workflow management", } @Article{Rahm:2001:SAA, author = "Erhard Rahm and Philip A. Bernstein", title = "A survey of approaches to automatic schema matching", journal = j-VLDB-J, volume = "10", number = "4", pages = "334--350", month = dec, year = "2001", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100057", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:50:59 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t1010004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/1010004/10100334.htm; http://link.springer.de/link/service/journals/00778/papers/1010004/10100334.pdf", abstract = "Schema matching is a basic problem in many database application domains, such as data integration, E-business, data warehousing, and semantic query processing. In current implementations, schema matching is typically performed manually, which has significant limitations. On the other hand, previous research papers have proposed many techniques to achieve a partial automation of the match operation for specific application domains. We present a taxonomy that covers many of these existing approaches, and we describe the approaches in some detail. In particular, we distinguish between schema-level and instance-level, element-level and structure-level, and language-based and constraint-based matchers. Based on our classification we review some previous match implementations thereby indicating which part of the solution space they cover. We intend our taxonomy and review of past work to be useful when comparing different approaches to schema matching, when developing a new match algorithm, and when implementing a schema matching component.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "graph matching; machine learning; model management; schema integration; schema matching", } @Article{Saltenis:2002:INR, author = "Simonas {\v{S}}altenis and Christian S. Jensen", title = "Indexing of now-relative spatio-bitemporal data", journal = j-VLDB-J, volume = "11", number = "1", pages = "1--16", month = aug, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100058", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:00 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011001/20110001.htm; http://link.springer.de/link/service/journals/00778/papers/2011001/20110001.pdf", abstract = "Real-world entities are inherently spatially and temporally referenced, and database applications increasingly exploit databases that record the past, present, and anticipated future locations of entities, e.g., the residences of customers obtained by the geo-coding of addresses. Indices that efficiently support queries on the spatio-temporal extents of such entities are needed. However, past indexing research has progressed in largely separate spatial and temporal streams. Adding time dimensions to spatial indices, as if time were a spatial dimension, neither supports nor exploits the special properties of time. On the other hand, temporal indices are generally not amenable to extension with spatial dimensions. This paper proposes the first efficient and versatile index for a general class of spatio-temporal data: the discretely changing spatial aspect of an object may be a point or may have an extent; both transaction time and valid time are supported, and a generalized notion of the current time, {\em now}, is accommodated for both temporal dimensions. The index is based on the R$^*$-tree and provides means of prioritizing space versus time, which enables it to adapt to spatially and temporally restrictive queries. Performance experiments are reported that evaluate pertinent aspects of the index.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "access method; bitemporal data; multidimensional indexing; R-tree; spatio-temporal data; transaction time; valid time", } @Article{Rafiei:2002:ERS, author = "Davood Rafiei and Alberto O. Mendelzon", title = "Efficient retrieval of similar shapes", journal = j-VLDB-J, volume = "11", number = "1", pages = "17--27", month = aug, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780100059", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:00 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011001/20110017.htm; http://link.springer.de/link/service/journals/00778/papers/2011001/20110017.pdf", abstract = "We propose an indexing technique for the fast retrieval of objects in 2D images based on similarity between their boundary shapes. Our technique is robust in the presence of noise and supports several important notions of similarity including optimal matches irrespective of variations in orientation and/or position. Our method can also handle size-invariant matches using a normalization technique, although optimality is not guaranteed here. We implemented our method and performed experiments on real (hand-written digits) data. Our experimental results showed the superiority of our method compared to search based on sequential scanning, which is the only obvious competitor. The performance gain of our method increases with any increase in the number or the size of shapes.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Fourier descriptors; image databases; shape retrieval; similarity queries; similarity retrieval", } @Article{Navarro:2002:SMS, author = "Gonzalo Navarro", title = "Searching in metric spaces by spatial approximation", journal = j-VLDB-J, volume = "11", number = "1", pages = "28--46", month = aug, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780200060", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:00 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011001/20110028.htm; http://link.springer.de/link/service/journals/00778/papers/2011001/20110028.pdf", abstract = "We propose a new data structure to search in metric spaces. A {\em metric space\/} is formed by a collection of objects and a {\em distance function\/} defined among them which satisfies the triangle inequality. The goal is, given a set of objects and a query, retrieve those objects close enough to the query. The complexity measure is the number of distances computed to achieve this goal. Our data structure, called {\em sa-tree\/} (``spatial approximation tree''), is based on approaching the searched objects spatially, that is, getting closer and closer to them, rather than the classic divide-and-conquer approach of other data structures. We analyze our method and show that the number of distance evaluations to search among $n$ objects is sublinear. We show experimentally that the {\em sa-tree\/} is the best existing technique when the metric space is hard to search or the query has low selectivity. These are the most important unsolved cases in real applications. As a practical advantage, our data structure is one of the few that does not need to tune parameters, which makes it appealing for use by non-experts.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "multimedia databases; similarity or proximity search; spatial and multidimensional search; spatial approximation tree", } @Article{Mihaila:2002:LAD, author = "George A. Mihaila and Louiqa Raschid and Anthony Tomasic", title = "Locating and accessing data repositories with {WebSemantics}", journal = j-VLDB-J, volume = "11", number = "1", pages = "47--57", month = aug, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780200061", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:00 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011001/20110047.htm; http://link.springer.de/link/service/journals/00778/papers/2011001/20110047.pdf", abstract = "Many collections of scientific data in particular disciplines are available today on the World Wide Web. Most of these data sources are compliant with some standard for interoperable access. In addition, sources may support a common semantics, i.e., a shared meaning for the data types and their domains. However, sharing data among a global community of users is still difficult because of the following reasons: (i) data providers need a mechanism for describing and publishing available sources of data; (ii) data administrators need a mechanism for discovering the location of published sources and obtaining metadata from these sources; and (iii) users need a mechanism for browsing and selecting sources. This paper describes a system, WebSemantics, that accomplishes the above tasks. We describe an architecture for the publication and discovery of scientific data sources, which is an extension of the World Wide Web architecture and protocols. We support catalogs containing metadata about data sources for some application domain. We define a language for discovering sources and querying their metadata. We then describe the WebSemantics prototype.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data discovery; data integration; mediators; query languages; World Wide Web; XML", } @Article{Ferrari:2002:ASD, author = "E. Ferrari and N. R. Adam and V. Atluri and E. Bertino and U. Capuozzo", title = "An authorization system for digital libraries", journal = j-VLDB-J, volume = "11", number = "1", pages = "58--67", month = aug, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780200063", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:00 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011001/20110058.htm; http://link.springer.de/link/service/journals/00778/papers/2011001/20110058.pdf", abstract = "Digital Libraries (DLs) introduce several challenging requirements with respect to the formulation, specification, and enforcement of adequate data protection policies. Unlike conventional database environments, a DL environment typically is characterized by a dynamic subject population, often making accesses from remote locations, and by an extraordinarily large amount of multimedia information, stored in a variety of formats. Moreover, in a DL environment, access policies are often specified based on subject qualifications and characteristics, rather than subject identity. Traditional authorization models are not adequate to meet access control requirements of DLs. In this paper, we present a {\em Digital Library Authorization System\/} (DLAS). DLAS employs a content-based authorization model, called a {\em Digital Library Authorization Model\/} (DLAM) which was proposed in previous work [1].", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "access control; credentials; digital libraries", } @Article{Marathe:2002:QPT, author = "Arunprasad P. Marathe and Kenneth Salem", title = "Query processing techniques for arrays", journal = j-VLDB-J, volume = "11", number = "1", pages = "68--91", month = aug, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780200062", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:00 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011001/20110068.htm; http://link.springer.de/link/service/journals/00778/papers/2011001/20110068.pdf", abstract = "Arrays are a common and important class of data. At present, database systems do not provide adequate array support: arrays can neither be easily defined nor conveniently manipulated. Further, array manipulations are not optimized. This paper describes a language called the {\em Array Manipulation Language\/} (AML), for expressing array manipulations, and a collection of optimization techniques for AML expressions. In the AML framework for array manipulation, arbitrary externally-defined functions can be applied to arrays in a structured manner. AML can be adapted to different application domains by choosing appropriate external function definitions. This paper concentrates on arrays occurring in databases of digital images such as satellite or medical images. AML queries can be treated declaratively and subjected to rewrite optimizations. Rewriting minimizes the number of applications of potentially costly external functions required to compute a query result. AML queries can also be optimized for space. Query results are generated a piece at a time by pipelined execution plans, and the amount of memory required by a plan depends on the order in which pieces are generated. An optimizer can consider generating the pieces of the query result in a variety of orders, and can efficiently choose orders that require less space. An AML-based prototype array database system called {\em ArrayDB\/} has been built, and it is used to show the effectiveness of these optimization techniques.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "array manipulation language; array query optimization; declarative query language; memory-usage optimization; pipelined evaluation; user-defined functions", } @Article{Sakurai:2002:SIH, author = "Yasushi Sakurai and Masatoshi Yoshikawa and Shunsuke Uemura and Haruhiko Kojima", title = "Spatial indexing of high-dimensional data based on relative approximation", journal = j-VLDB-J, volume = "11", number = "2", pages = "93--108", month = oct, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-002-0066-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:01 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011002/20110093.htm; http://link.springer.de/link/service/journals/00778/papers/2011002/20110093.pdf", abstract = "We propose a novel index structure, the A-tree (approximation tree), for similarity searches in high-dimensional data. The basic idea of the A-tree is the introduction of virtual bounding rectangles (VBRs) which contain and approximate MBRs or data objects. VBRs can be represented quite compactly and thus affect the tree configuration both quantitatively and qualitatively. First, since tree nodes can contain a large number of VBR entries, fanout becomes large, which increases search speed. More importantly, we have a free hand in arranging MBRs and VBRs in the tree nodes. Each A-tree node contains an MBR and its children VBRs. Therefore, by fetching an A-tree node, we can obtain information on the exact position of a parent MBR and the approximate position of its children. We have performed experiments using both synthetic and real data sets. For the real data sets, the A-tree outperforms the SR-tree and the VA-file in all dimensionalities up to 64 dimensions, which is the highest dimension in our experiments. Additionally, we propose a cost model for the A-tree. We verify the validity of the cost model for synthetic and real data sets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "high-dimensional data; relative approximation; similarity search", } @Article{Hjaltason:2002:SCP, author = "Gisli R. Hjaltason and Hanan Samet", title = "Speeding up construction of {PMR} quadtree-based spatial indexes", journal = j-VLDB-J, volume = "11", number = "2", pages = "109--137", month = oct, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-002-0067-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:01 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011002/20110109.htm; http://link.springer.de/link/service/journals/00778/papers/2011002/20110109.pdf", abstract = "Spatial indexes, such as those based on the quadtree, are important in spatial databases for efficient execution of queries involving spatial constraints, especially when the queries involve spatial joins. In this paper we present a number of techniques for speeding up the construction of quadtree-based spatial indexes, specifically the PMR quadtree, which can index arbitrary spatial data. We assume a quadtree implementation using the ``linear quadtree'', a disk-resident representation that stores objects contained in the leaf nodes of the quadtree in a linear index (e.g., a B-tree) ordered based on a space-filling curve. We present two complementary techniques: an improved insertion algorithm and a bulk-loading method. The bulk-loading method can be extended to handle bulk-insertions into an existing PMR quadtree. We make some analytical observations about the I/O cost and CPU cost of our PMR quadtree bulk-loading algorithm, and conduct an extensive empirical study of the techniques presented in the paper. Our techniques are found to yield significant speedup compared to traditional quadtree building methods, even when the size of a main memory buffer is very small compared to the size of the resulting quadtrees.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "bulk-loading; I/O; spatial indexing", } @Article{Nanopoulos:2002:ESS, author = "Alexandros Nanopoulos and Yannis Manolopoulos", title = "Efficient similarity search for market basket data", journal = j-VLDB-J, volume = "11", number = "2", pages = "138--152", month = oct, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-002-0068-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:01 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011002/20110138.htm; http://link.springer.de/link/service/journals/00778/papers/2011002/20110138.pdf", abstract = "Several organizations have developed very large market basket databases for the maintenance of customer transactions. New applications, e.g., Web recommendation systems, present the requirement for processing similarity queries in market basket databases. In this paper, we propose a novel scheme for similarity search queries in basket data. We develop a new representation method, which, in contrast to existing approaches, is proven to provide correct results. New algorithms are proposed for the processing of similarity queries. Extensive experimental results, for a variety of factors, illustrate the superiority of the proposed scheme over the state-of-the-art method.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data mining; market basket data; nearest-neighbor; similarity search", } @Article{Feng:2002:TMM, author = "Ling Feng and Jeffrey Xu Yu and Hongjun Lu and Jiawei Han", title = "A template model for multidimensional inter-transactional association rules", journal = j-VLDB-J, volume = "11", number = "2", pages = "153--175", month = oct, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-002-0069-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:01 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011002.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011002/20110153.htm; http://link.springer.de/link/service/journals/00778/papers/2011002/20110153.pdf", abstract = "Multidimensional inter-transactional association rules extend the traditional association rules to describe more general associations among items with multiple properties across transactions. ``{\em After McDonald and Burger King open branches, KFC will open a branch two months later and one mile away}'' is an example of such rules. Since the number of potential inter-transactional association rules tends to be extremely large, mining inter-transactional associations poses more challenges on efficient processing than mining traditional intra-transactional associations. In order to make such association rule mining truly practical and computationally tractable, in this study we present a template model to help users declare the interesting {\em multidimensional inter-transactional associations\/} to be mined. With the guidance of templates, several optimization techniques, i.e., joining, converging, and speeding, are devised to speed up the discovery of inter-transactional association rules. We show, through a series of experiments on both synthetic and real-life data sets, that these optimization techniques can yield significant performance benefits.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "intra-transactional/inter-transactional association rules; multidimensional context; template model", } @Article{Apers:2002:E, author = "Peter Apers and Stefano Ceri and Richard Snodgrass", title = "Editorial", journal = j-VLDB-J, volume = "11", number = "3", pages = "177--178", month = nov, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-002-0075-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:02 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Special issue VLDB best papers 2001.", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011003/20110177.htm; http://link.springer.de/link/service/journals/00778/papers/2011003/20110177.pdf", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{An:2002:EPT, author = "Ning An and Sudhanva Gurumurthi and Anand Sivasubramaniam and Narayanan Vijaykrishnan and Mahmut Kandemir and Mary Jane Irwin", title = "Energy-performance trade-offs for spatial access methods on memory-resident data", journal = j-VLDB-J, volume = "11", number = "3", pages = "179--197", month = nov, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-002-0073-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:02 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Special issue VLDB best papers 2001.", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011003/20110179.htm; http://link.springer.de/link/service/journals/00778/papers/2011003/20110179.pdf", abstract = "The proliferation of mobile and pervasive computing devices has brought energy constraints into the limelight. Energy-conscious design is important at all levels of system architecture, and the software has a key role to play in conserving battery energy on these devices. With the increasing popularity of spatial database applications, and their anticipated deployment on mobile devices (such as road atlases and GPS-based applications), it is critical to examine the energy implications of spatial data storage and access methods for memory resident datasets. While there has been extensive prior research on spatial access methods on resource-rich environments, this is, perhaps, the first study to examine their suitability for resource-constrained environments. Using a detailed cycle-accurate energy estimation framework and four different datasets, this paper examines the pros and cons of three previously proposed spatial indexing alternatives from both the energy and performance angles. Specifically, the Quadtree, Packed R-tree, and Buddy-Tree structures are evaluated and compared with a brute-force approach that does not use an index. The results show that there are both performance and energy trade-offs between the indexing schemes for the different queries. The nature of the query also plays an important role in determining the energy-performance trade-offs. Further, technological trends and architectural enhancements are influencing factors on the relative behavior of the index structures. The work in the query has a bearing on how and where (on a mobile client or/and on a server) it should be performed for performance and energy savings. The results from this study will be beneficial for the design and implementation of embedded spatial databases, accelerating their deployment on numerous mobile devices.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "energy optimization; multidimensional indexing; resource-constrained computing; spatial data", } @Article{Ailamaki:2002:DPL, author = "Anastassia Ailamaki and David J. DeWitt and Mark D. Hill", title = "Data page layouts for relational databases on deep memory hierarchies", journal = j-VLDB-J, volume = "11", number = "3", pages = "198--215", month = nov, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-002-0074-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:02 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Special issue VLDB best papers 2001.", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011003/20110198.htm; http://link.springer.de/link/service/journals/00778/papers/2011003/20110198.pdf", abstract = "Relational database systems have traditionally optimized for I/O performance and organized records sequentially on disk pages using the N-ary Storage Model (NSM) (a.k.a., slotted pages). Recent research, however, indicates that cache utilization and performance is becoming increasingly important on modern platforms. In this paper, we first demonstrate that in-page data placement is the key to high cache performance and that NSM exhibits low cache utilization on modern platforms. Next, we propose a new data organization model called PAX (Partition Attributes Across), that significantly improves cache performance by grouping together all values of each attribute within each page. Because PAX only affects layout inside the pages, it incurs no storage penalty and does not affect I/O behavior. According to our experimental results (which were obtained without using any indices on the participating relations), when compared to NSM: (a) PAX exhibits superior cache and memory bandwidth utilization, saving at least 75\% of NSM's stall time due to data cache accesses; (b) range selection queries and updates on memory-resident relations execute 1725\% faster; and (c) TPC-H queries involving I/O execute 1148\% faster. Finally, we show that PAX performs well across different memory system designs.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "cache-conscious database systems; disk page layout; relational data placement", } @Article{Chirkova:2002:FPV, author = "Rada Chirkova and Alon Y. Halevy and Dan Suciu", title = "A formal perspective on the view selection problem", journal = j-VLDB-J, volume = "11", number = "3", pages = "216--237", month = nov, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-002-0070-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:02 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Special issue VLDB best papers 2001.", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011003/20110216.htm; http://link.springer.de/link/service/journals/00778/papers/2011003/20110216.pdf", abstract = "The view selection problem is to choose a set of views to materialize over a database schema, such that the cost of evaluating a set of workload queries is minimized and such that the views fit into a prespecified storage constraint. The two main applications of the view selection problem are materializing views in a database to speed up query processing, and selecting views to materialize in a data warehouse to answer decision support queries. In addition, view selection is a core problem for intelligent data placement over a wide-area network for data integration applications and data management for ubiquitous computing. We describe several fundamental results concerning the view selection problem. We consider the problem for views and workloads that consist of equality-selection, project and join queries, and show that the complexity of the problem depends crucially on the quality of the estimates that a query optimizer has on the size of the views it is considering to materialize. When a query optimizer has good estimates of the sizes of the views, we show a somewhat surprising result, namely, that an optimal choice of views may involve a number of views that is exponential in the size of the database schema. On the other hand, when an optimizer uses standard estimation heuristics, we show that the number of necessary views and the expression size of each view are polynomially bounded.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "materialized views; view selection", } @Article{Aguilera:2002:VLS, author = "Vincent Aguilera and Sophie Cluet and Tova Milo and Pierangelo Veltri and Dan Vodislav", title = "Views in a large-scale {XML} repository", journal = j-VLDB-J, volume = "11", number = "3", pages = "238--255", month = nov, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-002-0065-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:02 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Special issue VLDB best papers 2001.", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011003/20110238.htm; http://link.springer.de/link/service/journals/00778/papers/2011003/20110238.pdf", abstract = "We are interested in defining and querying views in a huge and highly heterogeneous XML repository (Web scale). In this context, view definitions are very large, involving lots of sources, and there is no apparent limitation to their size. This raises interesting problems that we address in the paper: (i) how to distribute views over several machines without having a negative impact on the query translation process; (ii) how to quickly select the relevant part of a view given a query; (iii) how to minimize the cost of communicating potentially large queries to the machines where they will be evaluated. The solution that we propose is based on a simple view definition language that allows for automatic generation of views. The language maps paths in the view abstract DTD to paths in the concrete source DTDs. It enables a distributed implementation of the view system that is scalable both in terms of data and load. In particular, the query translation algorithm is shown to have a good (linear) complexity.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "query evaluation; semantic integration; views; warehouse; XML", } @Article{Hunt:2002:DIL, author = "Ela Hunt and Malcolm P. Atkinson and Robert W. Irving", title = "Database indexing for large {DNA} and protein sequence collections", journal = j-VLDB-J, volume = "11", number = "3", pages = "256--271", month = nov, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s007780200064", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:02 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011003.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "Special issue VLDB best papers 2001.", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011003/20110256.htm; http://link.springer.de/link/service/journals/00778/papers/2011003/20110256.pdf", abstract = "Our aim is to develop new database technologies for the approximate matching of unstructured string data using indexes. We explore the potential of the suffix tree data structure in this context. We present a new method of building suffix trees, allowing us to build trees in excess of RAM size, which has hitherto not been possible. We show that this method performs in practice as well as the $ O(n) $ method of Ukkonen [70]. Using this method we build indexes for 200 Mb of protein and 300 Mbp of DNA, whose disk-image exceeds the available RAM. We show experimentally that suffix trees can be effectively used in approximate string matching with biological data. For a range of query lengths and error bounds the suffix tree reduces the size of the unoptimised $ O(m n) $ dynamic programming calculation required in the evaluation of string similarity, and the gain from indexing increases with index size. In the indexes we built this reduction is significant, and less than 0.3\% of the expected matrix is evaluated. We detail the requirements for further database and algorithmic research to support efficient use of large suffix indexes in biological applications.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "approximate matching; biological sequence; database index; suffix tree", } @Article{Halevy:2002:GE, author = "Alon Y. Halevy", title = "Guest Editorial", journal = j-VLDB-J, volume = "11", number = "4", pages = "273--273", month = dec, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-002-0082-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:03 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011004/20110273.htm; http://link.springer.de/link/service/journals/00778/papers/2011004/20110273.pdf", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Jagadish:2002:TNX, author = "H. V. Jagadish and S. Al-Khalifa and A. Chapman and L. V. S. Lakshmanan and A. Nierman and S. Paparizos and J. M. Patel and D. Srivastava and N. Wiwatwattana and Y. Wu and C. Yu", title = "{TIMBER}: a native {XML} database", journal = j-VLDB-J, volume = "11", number = "4", pages = "274--291", month = dec, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-002-0081-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:03 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011004/20110274.htm; http://link.springer.de/link/service/journals/00778/papers/2011004/20110274.pdf", abstract = "This paper describes the overall design and architecture of the Timber XML database system currently being implemented at the University of Michigan. The system is based upon a bulk algebra for manipulating trees, and natively stores XML. New access methods have been developed to evaluate queries in the XML context, and new cost estimation and query optimization techniques have also been developed. We present performance numbers to support some of our design decisions. We believe that the key intellectual contribution of this system is a comprehensive set-at-a-time query processing ability in a native XML store, with all the standard components of relational query processing, including algebraic rewriting and a cost-based optimizer.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "algebra; document management; hierarchical; query processing; semi-structured", } @Article{Fiebig:2002:ANX, author = "T. Fiebig and S. Helmer and C.-C. Kanne and G. Moerkotte and J. Neumann and R. Schiele and T. Westmann", title = "Anatomy of a native {XML} base management system", journal = j-VLDB-J, volume = "11", number = "4", pages = "292--314", month = dec, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-002-0080-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:03 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011004/20110292.htm; http://link.springer.de/link/service/journals/00778/papers/2011004/20110292.pdf", abstract = "Several alternatives to manage large XML document collections exist, ranging from file systems over relational or other database systems to specifically tailored XML base management systems. In this paper we give a tour of Natix, a database management system designed from scratch for storing and processing XML data. Contrary to the common belief that management of XML data is just another application for traditional databases like relational systems, we illustrate how almost every component in a database system is affected in terms of adequacy and performance. We show how to design and optimize areas such as storage, transaction management --- comprising recovery and multi-user synchronization --- as well as query processing for XML.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "database; XML", } @Article{Amer-Yahia:2002:TPQ, author = "S. Amer-Yahia and S. Cho and L. V. S. Lakshmanan and D. Srivastava", title = "Tree pattern query minimization", journal = j-VLDB-J, volume = "11", number = "4", pages = "315--331", month = dec, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-002-0076-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:03 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011004/20110315.htm; http://link.springer.de/link/service/journals/00778/papers/2011004/20110315.pdf", abstract = "Tree patterns form a natural basis to query tree-structured data such as XML and LDAP. To improve the efficiency of tree pattern matching, it is essential to quickly identify and eliminate redundant nodes in the pattern. In this paper, we study tree pattern minimization both in the absence and in the presence of integrity constraints (ICs) on the underlying tree-structured database. In the absence of ICs, we develop a polynomial-time query minimization algorithm called CIM, whose efficiency stems from two key properties: (i) a node cannot be redundant unless its children are; and (ii) the order of elimination of redundant nodes is immaterial. When ICs are considered for minimization, we develop a technique for query minimization based on three fundamental operations: augmentation (an adaptation of the well-known chase procedure), minimization (based on homomorphism techniques), and reduction. We show the surprising result that the algorithm, referred to as ACIM, obtained by first augmenting the tree pattern using ICs, and then applying CIM, always finds the unique minimal equivalent query. While ACIM is polynomial time, it can be expensive in practice because of its inherent non-locality. We then present a fast algorithm, CDM, that identifies and eliminates local redundancies due to ICs, based on propagating ``information labels'' up the tree pattern. CDM can be applied prior to ACIM for improving the minimization efficiency. We complement our analytical results with an experimental study that shows the effectiveness of our tree pattern minimization techniques.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "query minimization; tree patterns; XML", } @Article{Chien:2002:ESM, author = "S.-Y. Chien and V. J. Tsotras and C. Zaniolo", title = "Efficient schemes for managing multiversion {XML} documents", journal = j-VLDB-J, volume = "11", number = "4", pages = "332--353", month = dec, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-002-0079-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:03 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011004/20110332.htm; http://link.springer.de/link/service/journals/00778/papers/2011004/20110332.pdf", abstract = "Multiversion support for XML documents is needed in many critical applications, such as software configuration control, cooperative authoring, web information warehouses, and ``e-permanence'' of web documents. In this paper, we introduce efficient and robust techniques for: (i) storing and retrieving; (ii) viewing and exchanging; and (iii) querying multiversion XML documents. We first discuss the limitations of traditional version control methods, such as RCS and SCCS, and then propose novel techniques that overcome their limitations. Initially, we focus on the problem of managing secondary storage efficiently, and introduce an {\em edit-based\/} versioning scheme that enhances RCS with an effective clustering policy based on the concept of page-usefulness. The new scheme drastically improves version retrieval at the expense of a small (linear) space overhead. However, the edit-based approach falls short of achieving objectives (ii) and (iii). Therefore, we introduce and investigate a second scheme, which is reference-based and preserves the structure of the original document. In the reference-based approach, a multiversion document can be represented as yet another XML document, which can be easily exchanged and viewed on the web; furthermore, simple queries are also expressed and supported well under this representation. To achieve objective (i), we extend the page-usefulness clustering technique to the reference-based scheme. After characterizing the asymptotic behavior of the new techniques proposed, the paper presents the results of an experimental study evaluating and comparing their performance.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "historical queries; temporal clustering; temporal indexing; version management; XML database", } @Article{Chan:2002:EFX, author = "C.-Y. Chan and P. Felber and M. Garofalakis and R. Rastogi", title = "Efficient filtering of {XML} documents with {XPath} expressions", journal = j-VLDB-J, volume = "11", number = "4", pages = "354--379", month = dec, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-002-0077-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:03 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011004/20110354.htm; http://link.springer.de/link/service/journals/00778/papers/2011004/20110354.pdf", abstract = "The publish/subscribe paradigm is a popular model for allowing publishers (i.e., data generators) to selectively disseminate data to a large number of widely dispersed subscribers (i.e., data consumers) who have registered their interest in specific information items. Early publish/subscribe systems have typically relied on simple subscription mechanisms, such as keyword or ``bag of words'' matching, or simple comparison predicates on attribute values. The emergence of XML as a standard for information exchange on the Internet has led to an increased interest in using more expressive subscription mechanisms (e.g., based on XPath expressions) that exploit both the structure and the content of published XML documents. Given the increased complexity of these new data-filtering mechanisms, the problem of effectively identifying the subscription profiles that match an incoming XML document poses a difficult and important research challenge. In this paper, we propose a novel index structure, termed XTrie, that supports the efficient filtering of XML documents based on XPath expressions. Our XTrie index structure offers several novel features that, we believe, make it especially attractive for large-scale publish/subscribe systems. First, XTrie is designed to support effective filtering based on complex XPath expressions (as opposed to simple, single-path specifications). Second, our XTrie structure and algorithms are designed to support both ordered and unordered matching of XML data. Third, by indexing on sequences of elements organized in a trie structure and using a sophisticated matching algorithm, XTrie is able to both reduce the number of unnecessary index probes as well as avoid redundant matchings, thereby providing extremely efficient filtering. Our experimental results over a wide range of XML document and XPath expression workloads demonstrate that our XTrie index structure outperforms earlier approaches by wide margins.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data dissemination; document filtering; index structure; XML; XPath", } @Article{Ives:2002:XQE, author = "Zachary G. Ives and A. Y. Halevy and D. S. Weld", title = "An {XML} query engine for network-bound data", journal = j-VLDB-J, volume = "11", number = "4", pages = "380--402", month = dec, year = "2002", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-002-0078-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:03 MDT 2008", bibsource = "http://link.springer.de/link/service/journals/00778/tocs/t2011004.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/2011004/20110380.htm; http://link.springer.de/link/service/journals/00778/papers/2011004/20110380.pdf", abstract = "XML has become the lingua franca for data exchange and integration across administrative and enterprise boundaries. Nearly all data providers are adding XML import or export capabilities, and standard XML Schemas and DTDs are being promoted for all types of data sharing. The ubiquity of XML has removed one of the major obstacles to integrating data from widely disparate sources --- namely, the heterogeneity of data formats. However, general-purpose integration of data across the wide are a also requires a query processor that can query data sources on demand, receive streamed XML data from them, and combine and restructure the data into new XML output --- while providing good performance for both batch-oriented and ad hoc, interactive queries. This is the goal of the Tukwila data integration system, the first system that focuses on network-bound, dynamic XML data sources. In contrast to previous approaches, which must read, parse, and often store entire XML objects before querying them, Tukwila can return query results even as the data is streaming into the system. Tukwila is built with a new system architecture that extends adaptive query processing and relational-engine techniques into the XML realm, as facilitated by a pair of operators that incrementally evaluate a query's input path expressions as data is read. In this paper, we describe the Tukwila architecture and its novel aspects, and we experimentally demonstrate that Tukwila provides better overall query performance and faster initial answers than existing systems, and has excellent scalability.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data integration; data streams; query processing; web and databases; XML", } @Article{Ozsu:2003:NPA, author = "M. Tamer {\"O}zsu", title = "New partnership with {ACM} and update on the journal", journal = j-VLDB-J, volume = "12", number = "1", pages = "1--1", month = may, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0089-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:05 MDT 2008", bibsource = "http://link.springer-ny.com/link/service/journals/UNKNOWN/tocs/t3012001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/3012001/30120001.htm; http://link.springer.de/link/service/journals/00778/papers/3012001/30120001.pdf", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Sheth:2003:CRK, author = "A. Sheth and S. Thacker and S. Patel", title = "Complex relationships and knowledge discovery support in the {InfoQuilt} system", journal = j-VLDB-J, volume = "12", number = "1", pages = "2--27", month = may, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-002-0071-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:05 MDT 2008", bibsource = "http://link.springer-ny.com/link/service/journals/UNKNOWN/tocs/t3012001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/3012001/30120002.htm; http://link.springer.de/link/service/journals/00778/papers/3012001/30120002.pdf", abstract = "Support for semantic content is becoming more common in Web-accessible information systems. We see this support emerging with the use of ontologies and machine-readable, annotated documents. The practice of domain modeling coupled with the extraction of domain-specific, contextually relevant metadata also supports the use of semantics. These advancements enable knowledge discovery approaches that define complex relationships between data that is autonomously collected and managed. The InfoQuilt (One of the incarnations of the InfoQuilt system, as applied to the geographic information as part of the NSF Digital Library II initiative is the ADEPT-UGA system [Ade]. This research was funded in part by National Science Foundation grant IIS-9817432.) system supports one such knowledge discovery approach. This paper presents (parts of) the InfoQuilt system with the focus on its use for modeling and utilizing complex semantic inter-domain relationships to enable human-assisted knowledge discovery over Web-accessible heterogeneous data. This includes the specification and execution of Information Scale (IScapes), a semantically rich information request and correlation mechanism.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Verykios:2003:BDM, author = "V. S. Verykios and G. V. Moustakides and M. G. Elfeky", title = "A {Bayesian} decision model for cost optimal record matching", journal = j-VLDB-J, volume = "12", number = "1", pages = "28--40", month = may, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-002-0072-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:05 MDT 2008", bibsource = "http://link.springer-ny.com/link/service/journals/UNKNOWN/tocs/t3012001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/3012001/30120028.htm; http://link.springer.de/link/service/journals/00778/papers/3012001/30120028.pdf", abstract = "In an error-free system with perfectly clean data, the construction of a global view of the data consists of linking --- in relational terms, joining --- two or more tables on their key fields. Unfortunately, most of the time, these data are neither carefully controlled for quality nor necessarily defined commonly across different data sources. As a result, the creation of such a global data view resorts to approximate joins. In this paper, an optimal solution is proposed for the matching or the linking of database record pairs in the presence of inconsistencies, errors or missing values in the data. Existing models for record matching rely on decision rules that minimize the probability of error, that is the probability that a sample (a measurement vector) is assigned to the wrong class. In practice though, minimizing the probability of error is not the best criterion to design a decision rule because the misclassifications of different samples may have different consequences. In this paper we present a decision model that minimizes the cost of making a decision. In particular: (a) we present a decision rule: (b) we prove that this rule is optimal with respect to the cost of a decision: and (c) we compute the probabilities of the two types of errors (Type I and Type II) that incur when this rule is applied. We also present a closed form decision model for a certain class of record comparison pairs along with an example, and results from comparing the proposed cost-based model to the error-based model, for large record comparison spaces.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "cost optimal statistical model; data cleaning; record linkage", } @Article{Cui:2003:LTG, author = "Y. Cui and J. Widom", title = "Lineage tracing for general data warehouse transformations", journal = j-VLDB-J, volume = "12", number = "1", pages = "41--58", month = may, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-002-0083-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:05 MDT 2008", bibsource = "http://link.springer-ny.com/link/service/journals/UNKNOWN/tocs/t3012001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/3012001/30120041.htm; http://link.springer.de/link/service/journals/00778/papers/3012001/30120041.pdf", abstract = "Data warehousing systems integrate information from operational data sources into a central repository to enable analysis and mining of the integrated information. During the integration process, source data typically undergoes a series of {\em transformations}, which may vary from simple algebraic operations or aggregations to complex ``data cleansing'' procedures. In a warehousing environment, the {\em data lineage\/} problem is that of tracing warehouse data items back to the original source items from which they were derived. We formally define the lineage tracing problem in the presence of general data warehouse transformations, and we present algorithms for lineage tracing in this environment. Our tracing procedures take advantage of known structure or properties of transformations when present, but also work in the absence of such information. Our results can be used as the basis for a lineage tracing tool in a general warehousing setting, and also can guide the design of data warehouses that enable efficient lineage tracing.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data lineage; data warehouse; inverse; lineage tracing; transformation", } @Article{Medjahed:2003:BBI, author = "B. Medjahed and B. Benatallah and A. Bouguettaya and A. H. H. Ngu and A. K. Elmagarmid", title = "Business-to-business interactions: issues and enabling technologies", journal = j-VLDB-J, volume = "12", number = "1", pages = "59--85", month = may, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0087-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:05 MDT 2008", bibsource = "http://link.springer-ny.com/link/service/journals/UNKNOWN/tocs/t3012001.htm; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.de/link/service/journals/00778/bibs/3012001/30120059.htm; http://link.springer.de/link/service/journals/00778/papers/3012001/30120059.pdf", abstract = "Business-to-Business (B2B) technologies pre-date the Web. They have existed for at least as long as the Internet. B2B applications were among the first to take advantage of advances in computer networking. The Electronic Data Interchange (EDI) business standard is an illustration of such an early adoption of the advances in computer networking. The ubiquity and the affordability of the Web has made it possible for the masses of businesses to automate their B2B interactions. However, several issues related to scale, content exchange, autonomy, heterogeneity, and other issues still need to be addressed. In this paper, we survey the main techniques, systems, products, and standards for B2B interactions. We propose a set of criteria for assessing the different B2B interaction techniques, standards, and products.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "B2B interactions; components; e-commerce; EDI; Web services; workflows; XML", } @Article{Bernstein:2003:GE, author = "Philip A. Bernstein and Yannis Ioannidis and Raghu Ramakrishnan", title = "Guest editorial", journal = j-VLDB-J, volume = "12", number = "2", pages = "87--88", month = aug, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0092-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:06 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ramamurthy:2003:CFM, author = "Ravishankar Ramamurthy and David J. DeWitt and Qi Su", title = "A case for fractured mirrors", journal = j-VLDB-J, volume = "12", number = "2", pages = "89--101", month = aug, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0093-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:06 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The decomposition storage model (DSM) vertically partitions all attributes of a table and has excellent I/O behavior when the number of attributes accessed by a query is small. It also has a better cache footprint than the standard storage model (NSM) used by most database systems. However, DSM incurs a high cost in reconstructing the original tuple from its partitions. We first revisit some of the performance problems associated with DSM and suggest a simple indexing strategy and compare different reconstruction algorithms. Then we propose a new mirroring scheme, termed fractured mirrors, using both NSM and DSM models. This scheme combines the best aspects of both models, along with the added benefit of mirroring to better serve an ad hoc query workload. A prototype system has been built using the Shore storage manager, and performance is evaluated using queries from the TPC-H workload.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data placement; disk mirroring; vertical partitioning", } @Article{Chan:2003:RTE, author = "Chee-Yong Chan and Minos Garofalakis and Rajeev Rastogi", title = "{RE}-tree: an efficient index structure for regular expressions", journal = j-VLDB-J, volume = "12", number = "2", pages = "102--119", month = aug, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0094-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:06 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Due to their expressive power, regular expressions (REs) are quickly becoming an integral part of language specifications for several important application scenarios. Many of these applications have to manage huge databases of RE specifications and need to provide an effective matching mechanism that, given an input string, quickly identifies the REs in the database that match it. In this paper, we propose the RE-tree, a novel index structure for large databases of RE specifications. Given an input query string, the RE-tree speeds up the retrieval of matching REs by focusing the search and comparing the input string with only a small fraction of REs in the database. Even though the RE-tree is similar in spirit to other tree-based structures that have been proposed for indexing multidimensional data, RE indexing is significantly more challenging since REs typically represent infinite sets of strings with no well-defined notion of spatial locality. To address these new challenges, our RE-tree index structure relies on novel measures for comparing the relative sizes of infinite regular languages. We also propose innovative solutions for the various RE-tree operations including the effective splitting of RE-tree nodes and computing a `tight' bounding RE for a collection of REs. Finally, we demonstrate how sampling-based approximation algorithms can be used to significantly speed up the performance of RE-tree operations. Preliminary experimental results with moderately large synthetic data sets indicate that the RE-tree is effective in pruning the search space and easily outperforms naive sequential search approaches.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "index structure; regular expressions; sampling-based approximations; size measures", } @Article{Abadi:2003:ANM, author = "Daniel J. Abadi and Don Carney and Ugur {\c{C}}etintemel and Mitch Cherniack and Christian Convey and Sangdon Lee and Michael Stonebraker and Nesime Tatbul and Stan Zdonik", title = "{Aurora}: a new model and architecture for data stream management", journal = j-VLDB-J, volume = "12", number = "2", pages = "120--139", month = aug, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0095-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:06 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This paper describes the basic processing model and architecture of Aurora, a new system to manage data streams for monitoring applications. Monitoring applications differ substantially from conventional business data processing. The fact that a software system must process and react to continual inputs from many sources (e.g., sensors) rather than from human operators requires one to rethink the fundamental architecture of a DBMS for this application area. In this paper, we present Aurora, a new DBMS currently under construction at Brandeis University, Brown University, and M.I.T. We first provide an overview of the basic Aurora model and architecture and then describe in detail a stream-oriented set of operators.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "continuous queries; data stream management; database triggers; quality-of-service; real-time systems", } @Article{Chandrasekaran:2003:PSS, author = "Sirish Chandrasekaran and Michael J. Franklin", title = "{PSoup}: a system for streaming queries over streaming data", journal = j-VLDB-J, volume = "12", number = "2", pages = "140--156", month = aug, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0096-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:06 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Recent work on querying data streams has focused on systems where newly arriving data is processed and continuously streamed to the user in real time. In many emerging applications, however, ad hoc queries and/or intermittent connectivity also require the processing of data that arrives prior to query submission or during a period of disconnection. For such applications, we have developed PSoup, a system that combines the processing of ad hoc and continuous queries by treating data and queries symmetrically, allowing new queries to be applied to old data and new data to be applied to old queries. PSoup also supports intermittent connectivity by separating the computation of query results from the delivery of those results. PSoup builds on adaptive query-processing techniques developed in the Telegraph project at UC Berkeley. In this paper, we describe PSoup and present experiments that demonstrate the effectiveness of our approach.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "disconnected operation; query-data duality; stream query processing", } @Article{Agrawal:2003:WRD, author = "Rakesh Agrawal and Peter J. Haas and Jerry Kiernan", title = "Watermarking relational data: framework, algorithms and analysis", journal = j-VLDB-J, volume = "12", number = "2", pages = "157--169", month = aug, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0097-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:06 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We enunciate the need for watermarking database relations to deter data piracy, identify the characteristics of relational data that pose unique challenges for watermarking, and delineate desirable properties of a watermarking system for relational data. We then present an effective watermarking technique geared for relational data. This technique ensures that some bit positions of some of the attributes of some of the tuples contain specific values. The specific bit locations and values are algorithmically determined under the control of a secret key known only to the owner of the data. This bit pattern constitutes the watermark. Only if one has access to the secret key can the watermark be detected with high probability. Detecting the watermark requires access neither to the original data nor the watermark, and the watermark can be easily and efficiently maintained in the presence of insertions, updates, and deletions. Our analysis shows that the proposed technique is robust against various forms of malicious attacks as well as benign updates to the data. Using an implementation running on DB2, we also show that the algorithms perform well enough to be used in real-world applications.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "database; information hiding; steganography; watermarking", } @Article{Chakrabarti:2003:FAT, author = "Soumen Chakrabarti and Shourya Roy and Mahesh V. Soundalgekar", title = "Fast and accurate text classification via multiple linear discriminant projections", journal = j-VLDB-J, volume = "12", number = "2", pages = "170--185", month = aug, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0098-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:06 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Support vector machines (SVMs) have shown superb performance for text classification tasks. They are accurate, robust, and quick to apply to test instances. Their only potential drawback is their training time and memory requirement. For $n$ training instances held in memory, the best-known SVM implementations take time proportional to $ n^a$, where $a$ is typically between 1.8 and 2.1. SVMs have been trained on data sets with several thousand instances, but Web directories today contain millions of instances that are valuable for mapping billions of Web pages into Yahoo!-like directories. We present SIMPL, a nearly linear-time classification algorithm that mimics the strengths of SVMs while avoiding the training bottleneck. It uses Fisher's linear discriminant, a classical tool from statistical pattern recognition, to project training instances to a carefully selected low-dimensional subspace before inducing a decision tree on the projected instances. SIMPL uses efficient sequential scans and sorts and is comparable in speed and memory scalability to widely used naive Bayes (NB) classifiers, but it beats NB accuracy decisively. It not only approaches and sometimes exceeds SVM accuracy, but also beats the running time of a popular SVM implementation by orders of magnitude. While describing SIMPL, we make a detailed experimental comparison of SVM-generated discriminants with Fisher's discriminants, and we also report on an analysis of the cache performance of a popular SVM implementation. Our analysis shows that SIMPL has the potential to be the method of choice for practitioners who want the accuracy of SVMs and the simplicity and speed of naive Bayes classifiers.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "discriminative learning; linear discriminants; text classification", } @Article{Fung:2003:CDV, author = "Chi-Wai Fung and Kamalakar Karlapalem and Qing Li", title = "Cost-driven vertical class partitioning for methods in object oriented databases", journal = j-VLDB-J, volume = "12", number = "3", pages = "187--210", month = oct, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-002-0084-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:07 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In object-oriented databases (OODBs), a method encapsulated in a class typically accesses a few, but not all the instance variables defined in the class. It may thus be preferable to vertically partition the class for reducing irrelevant data (instance variables) accessed by the methods. Our prior work has shown that vertical class partitioning can result in a substantial decrease in the total number of disk accesses incurred for executing a set of applications, but coming up with an optimal vertical class partitioning scheme is a hard problem. In this paper, we present two algorithms for deriving optimal and near-optimal vertical class partitioning schemes. The cost-driven algorithm provides the optimal vertical class partitioning schemes by enumerating, exhaustively, all the schemes and calculating the number of disk accesses required to execute a given set of applications. For this, a cost model for executing a set of methods in an OODB system is developed. Since exhaustive enumeration is costly and only works for classes with a small number of instance variables, a hill-climbing heuristic algorithm (HCHA) is developed, which takes the solution provided by the affinity-based algorithm and improves it, thereby further reducing the total number of disk accesses incurred. We show that the HCHA algorithm provides a reasonable near-optimal vertical class partitioning scheme for executing a given set of applications.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "affinity-based; analytical cost model; cost-driven; hill-climbing heuristic algorithm; method-induced; object-oriented databases; vertical class partitioning", } @Article{Li:2003:CCA, author = "Chen Li", title = "Computing complete answers to queries in the presence of limited access patterns", journal = j-VLDB-J, volume = "12", number = "3", pages = "211--227", month = oct, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-002-0085-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:07 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In data applications such as information integration, there can be limited access patterns to relations, i.e., binding patterns require values to be specified for certain attributes in order to retrieve data from a relation. As a consequence, we cannot retrieve all tuples from these relations. In this article we study the problem of computing the {\em complete\/} answer to a query, i.e., the answer that could be computed if all the tuples could be retrieved. A query is {\em stable\/} if for any instance of the relations in the query, its complete answer can be computed using the access patterns permitted by the relations. We study the problem of testing stability of various classes of queries, including conjunctive queries, unions of conjunctive queries, and conjunctive queries with arithmetic comparisons. We give algorithms and complexity results for these classes of queries. We show that stability of datalog programs is undecidable, and give a sufficient condition for stability of datalog queries. Finally, we study data-dependent computability of the complete answer to a nonstable query, and propose a decision tree for guiding the process to compute the complete answer.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "complete answers to queries; limited access patterns to relations; query stability", } @Article{Chua:2003:IBA, author = "Cecil Eng H. Chua and Roger H. L. Chiang and Ee-Peng Lim", title = "Instance-based attribute identification in database integration", journal = j-VLDB-J, volume = "12", number = "3", pages = "228--243", month = oct, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0088-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:07 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Most research on attribute identification in database integration has focused on integrating attributes using schema and summary information derived from the attribute values. No research has attempted to fully explore the use of attribute values to perform attribute identification. We propose an attribute identification method that employs schema and summary instance information as well as properties of attributes derived from their instances. Unlike other attribute identification methods that match only single attributes, our method matches attribute groups for integration. Because our attribute identification method fully explores data instances, it can identify corresponding attributes to be integrated even when schema information is misleading. Three experiments were performed to validate our attribute identification method. In the first experiment, the heuristic rules derived for attribute classification were evaluated on 119 attributes from nine public domain data sets. The second was a controlled experiment validating the robustness of the proposed attribute identification method by introducing erroneous data. The third experiment evaluated the proposed attribute identification method on five data sets extracted from online music stores. The results demonstrated the viability of the proposed method.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "attribute identification; database integration; measures of association", } @Article{Helmer:2003:PSF, author = "Sven Helmer and Guido Moerkotte", title = "A performance study of four index structures for set-valued attributes of low cardinality", journal = j-VLDB-J, volume = "12", number = "3", pages = "244--261", month = oct, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0106-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:07 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The efficient retrieval of data items on set-valued attributes is an important research topic that has attracted little attention so far. We studied and modified four index structures (sequential signature files, signature trees, extendible signature hashing, and inverted files) for a fast retrieval of sets with low cardinality. We compared the index structures by implementing them and subjecting them to extensive experiments, investigating the influence of query set size, database size, domain size, and data distribution (synthetic and real). The results of the experiments clearly indicate that inverted files exhibit the best overall behavior of all tested index structures.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "access methods; database management systems; index structures; physical design; set-valued attributes", } @Article{Yang:2003:ICM, author = "Jun Yang and Jennifer Widom", title = "Incremental computation and maintenance of temporal aggregates", journal = j-VLDB-J, volume = "12", number = "3", pages = "262--283", month = oct, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0107-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:07 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We consider the problems of computing aggregation queries in temporal databases and of maintaining materialized temporal aggregate views efficiently. The latter problem is particularly challenging since a single data update can cause aggregate results to change over the entire time line. We introduce a new index structure called the {\em SB-tree}, which incorporates features from both {\em segment-trees\/} and {\em B-trees}. SB-trees support fast lookup of aggregate results based on time and can be maintained efficiently when the data change. We extend the basic SB-tree index to handle {\em cumulative\/} (also called {\em moving-window\/}) aggregates, considering separatelycases when the window size is or is not fixed in advance. For materialized aggregate views in a temporal database or warehouse, we propose building and maintaining SB-tree indices instead of the views themselves.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "access methods; aggregation; B-tree; segment tree; temporal database; view maintenance", } @Article{Atluri:2003:GE, author = "Vijay Atluri and Anupam Joshi and Yelena Yesha", title = "Guest editorial", journal = j-VLDB-J, volume = "12", number = "4", pages = "285--285", month = nov, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0109-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:08 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Maedche:2003:MMD, author = "A. Maedche and B. Motik and L. Stojanovic", title = "Managing multiple and distributed ontologies on the {Semantic Web}", journal = j-VLDB-J, volume = "12", number = "4", pages = "286--302", month = nov, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0102-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:08 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In traditional software systems, significant attention is devoted to keeping modules well separated and coherent with respect to functionality, thus ensuring that changes in the system are localized to a handful of modules. Reuse is seen as the key method in reaching that goal. Ontology-based systems on the Semantic Web are just a special class of software systems, so the same principles apply. In this article, we present an integrated framework for managing multiple and distributed ontologies on the Semantic Web. It is based on the representation model for ontologies, trading off between expressivity and tractability. In our framework, we provide features for reusing existing ontologies and for evolving them while retaining the consistency. The approach is implemented within KAON, the Karlsruhe Ontology and Semantic Web tool suite.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "multiple and distributed ontologies; ontology evolution", } @Article{Doan:2003:LMO, author = "AnHai Doan and Jayant Madhavan and Robin Dhamankar and Pedro Domingos and Alon Halevy", title = "Learning to match ontologies on the {Semantic Web}", journal = j-VLDB-J, volume = "12", number = "4", pages = "303--319", month = nov, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0104-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:08 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "On the Semantic Web, data will inevitably come from many different ontologies, and information processing across ontologies is not possible without knowing the semantic mappings between them. Manually finding such mappings is tedious, error-prone, and clearly not possible on the Web scale. Hence the development of tools to assist in the ontology mapping process is crucial to the success of the Semantic Web. We describe {\em GLUE}, a system that employs machine learning techniques to find such mappings. Given two ontologies, for each concept in one ontology {\em GLUE\/} finds the most similar concept in the other ontology. We give well-founded probabilistic definitions to several practical similarity measures and show that {\em GLUE\/} can work with all of them. Another key feature of {\em GLUE\/} is that it uses multiple learning strategies, each of which exploits well a different type of information either in the data instances or in the taxonomic structure of the ontologies. To further improve matching accuracy, we extend {\em GLUE\/} to incorporate common-sense knowledge and domain constraints into the matching process. Our approach is thus distinguished in that it works with a variety of well-defined similarity notions and that it efficiently incorporates multiple types of knowledge. We describe a set of experiments on several real-world domains and show that {\em GLUE\/} proposes highly accurate semantic mappings. Finally, we extend {\em GLUE\/} to find complex mappings between ontologies and describe experiments that show the promise of the approach.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "machine learning; ontology matching; relaxation labeling; Semantic Web", } @Article{Halkidi:2003:TOW, author = "Maria Halkidi and Benjamin Nguyen and Iraklis Varlamis and Michalis Vazirgiannis", title = "{THESUS}: Organizing {Web} document collections based on link semantics", journal = j-VLDB-J, volume = "12", number = "4", pages = "320--332", month = nov, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0100-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:08 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The requirements for effective search and management of the WWW are stronger than ever. Currently Web documents are classified based on their content not taking into account the fact that these documents are connected to each other by links. We claim that a page's classification is enriched by the detection of its incoming links' semantics. This would enable effective browsing and enhance the validity of search results in the WWW context. Another aspect that is underaddressed and strictly related to the tasks of browsing and searching is the similarity of documents at the semantic level. The above observations lead us to the adoption of a hierarchy of concepts (ontology) and a thesaurus to exploit links and provide a better characterization of Web documents. The enhancement of document characterization makes operations such as clustering and labeling very interesting. To this end, we devised a system called THESUS. The system deals with an initial sets of Web documents, extracts keywords from all pages' incoming links, and converts them to semantics by mapping them to a domain's ontology. Then a clustering algorithm is applied to discover groups of Web documents. The effectiveness of the clustering process is based on the use of a novel similarity measure between documents characterized by sets of terms. Web documents are organized into thematic subsets based on their semantics. The subsets are then labeled, thereby enabling easier management (browsing, searching, querying) of the Web. In this article, we detail the process of this system and give an experimental analysis of its results.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "document clustering; link analysis; link management; semantics; similarity measure; World Wide Web", } @Article{Medjahed:2003:CWS, author = "Brahim Medjahed and Athman Bouguettaya and Ahmed K. Elmagarmid", title = "Composing {Web} services on the {Semantic Web}", journal = j-VLDB-J, volume = "12", number = "4", pages = "333--351", month = nov, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0101-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:08 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Service composition is gaining momentum as the potential {\em silver bullet\/} for the envisioned {\em Semantic Web}. It purports to take the Web to unexplored efficiencies and provide a flexible approach for promoting all types of activities in tomorrow's Web. Applications expected to heavily take advantage of Web service composition include B2B E-commerce and E-government. To date, enabling composite services has largely been an ad hoc, time-consuming, and error-prone process involving repetitive low-level programming. In this paper, we propose an {\em ontology\/}-based framework for the automatic composition of Web services. We present a technique to generate composite services from high-level declarative descriptions. We define formal safeguards for meaningful composition through the use of {\em composability\/} rules. These rules compare the {\em syntactic\/} and {\em semantic\/} features of Web services to determine whether two services are composable. We provide an implementation using an E-government application offering customized services to indigent citizens. Finally, we present an exhaustive performance experiment to assess the scalability of our approach.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "ontology; Semantic Web; service composition; Web services", } @Article{Fileto:2003:POW, author = "Renato Fileto and Ling Liu and Calton Pu and Eduardo Delgado Assad and Claudia Bauzer Medeiros", title = "{POESIA}: an ontological workflow approach for composing {Web} services in agriculture", journal = j-VLDB-J, volume = "12", number = "4", pages = "352--367", month = nov, year = "2003", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0103-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:08 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This paper describes the POESIA approach to systematic composition of Web services. This pragmatic approach is strongly centered in the use of domain-specific multidimensional ontologies. Inspired by applications needs and founded on ontologies, workflows, and activity models, POESIA provides well-defined operations (aggregation, specialization, and instantiation) to support the composition of Web services. POESIA complements current proposals for Web services definition and composition by providing a higher degree of abstraction with verifiable consistency properties. We illustrate the POESIA approach using a concrete application scenario in agroenvironmental planning.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "composition of Web services; data integration; ontologies; Semantic Web; semantics of data and processes", } @Article{Jensen:2004:MDM, author = "Christian S. Jensen and Augustas Kligys and Torben Bach Pedersen and Igor Timko", title = "Multidimensional data modeling for location-based services", journal = j-VLDB-J, volume = "13", number = "1", pages = "1--21", month = jan, year = "2004", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0091-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:09 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "With the recent and continuing advances in areas such as wireless communications and positioning technologies, mobile, location-based services are becoming possible. Such services deliver location-dependent content to their users. More specifically, these services may capture the movements and requests of their users in multidimensional databases, i.e., data warehouses, and content delivery may be based on the results of complex queries on these data warehouses. Such queries aggregate detailed data in order to find useful patterns, e.g., in the interaction of a particular user with the services. The application of multidimensional technology in this context poses a range of new challenges. The specific challenge addressed here concerns the provision of an appropriate multidimensional data model. In particular, the paper extends an existing multidimensional data model and algebraic query language to accommodate spatial values that exhibit partial containment relationships instead of the total containment relationships normally assumed in multidimensional data models. Partial containment introduces imprecision in aggregation paths. The paper proposes a method for evaluating the imprecision of such paths. The paper also offers transformations of dimension hierarchies with partial containment relationships to simple hierarchies, to which existing precomputation techniques are applicable.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data modeling; location-based services; multidimensional data; partial containment", } @Article{Zhang:2004:PMV, author = "Xin Zhang and Lingli Ding and Elke A. Rundensteiner", title = "Parallel multisource view maintenance", journal = j-VLDB-J, volume = "13", number = "1", pages = "22--48", month = jan, year = "2004", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0086-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:09 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In a distributed environment, materialized views are used to integrate data from different information sources and then store them in some centralized location. In order to maintain such materialized views, maintenance queries need to be sent to information sources by the data warehouse management system. Due to the independence of the information sources and the data warehouse, concurrency issues are raised between the maintenance queries and the local update transactions at each information source. Recent solutions such as ECA and Strobe tackle such concurrent maintenance, however with the requirement of quiescence of the information sources. SWEEP and POSSE overcome this limitation by decomposing the global maintenance query into smaller subqueries to be sent to every information source and then performing conflict correction locally at the data warehouse. Note that all these previous approaches handle the data updates {\em one at a time}. Hence either some of the information sources or the data warehouse is likely to be idle during most of the maintenance process. In this paper, we propose that a set of updates should be maintained in parallel by several concurrent maintenance processes so that both the information sources as well as the warehouse would be utilized more fully throughout the maintenance process. This parallelism should then improve the overall maintenance performance. For this we have developed a parallel view maintenance algorithm, called PVM, that substantially improves upon the performance of previous maintenance approaches by handling a set of data updates at the same time. The parallel handling of a set of updates is orthogonal to the particular maintenance algorithm applied to the handling of each individual update. In order to perform parallel view maintenance, we have identified two critical issues that must be overcome: (1) detecting maintenance-concurrent data updates in a parallel mode and (2) correcting the problem that the data warehouse commit order may not correspond to the data warehouse update processing order due to parallel maintenance handling. In this work, we provide solutions to both issues. For the former, we insert a middle-layer timestamp assignment module for detecting maintenance-concurrent data updates without requiring any global clock synchronization. For the latter, we introduce the negative counter concept to solve the problem of variant orders of committing effects of data updates to the data warehouse. We provide a proof of the correctness of PVM that guarantees that our strategy indeed generates the correct final data warehouse state. We have implemented both SWEEP and PVM in our EVE data warehousing system. Our performance study demonstrates that a manyfold performance improvement is achieved by PVM over SWEEP.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "concurrent data updates; data warehousing; parallel view maintenance; performance evaluation", } @Article{Hristidis:2004:AAA, author = "Vagelis Hristidis and Yannis Papakonstantinou", title = "Algorithms and applications for answering ranked queries using ranked views", journal = j-VLDB-J, volume = "13", number = "1", pages = "49--70", month = jan, year = "2004", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0099-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:09 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Ranked queries return the top objects of a database according to a preference function. We present and evaluate (experimentally and theoretically) a core algorithm that answers ranked queries in an efficient pipelined manner using materialized ranked views. We use and extend the core algorithm in the described PREFER and MERGE systems. PREFER precomputes a set of materialized views that provide guaranteed query performance. We present an algorithm that selects a near optimal set of views under space constraints. We also describe multiple optimizations and implementation aspects of the downloadable version of PREFER. Then we discuss MERGE, which operates at a metabroker and answers ranked queries by retrieving a minimal number of objects from sources that offer ranked queries. A speculative version of the pipelining algorithm is described.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "materialization; merge ranked views; ranked queries", } @Article{Khan:2004:REO, author = "Latifur Khan and Dennis McLeod and Eduard Hovy", title = "Retrieval effectiveness of an ontology-based model for information selection", journal = j-VLDB-J, volume = "13", number = "1", pages = "71--85", month = jan, year = "2004", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0105-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:09 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Technology in the field of digital media generates huge amounts of nontextual information, audio, video, and images, along with more familiar textual information. The potential for exchange and retrieval of information is vast and daunting. The key problem in achieving efficient and user-friendly retrieval is the development of a search mechanism to guarantee delivery of minimal irrelevant information (high precision) while insuring relevant information is not overlooked (high recall). The traditional solution employs keyword-based search. The only documents retrieved are those containing user-specified keywords. But many documents convey desired semantic information without containing these keywords. This limitation is frequently addressed through query expansion mechanisms based on the statistical co-occurrence of terms. Recall is increased, but at the expense of deteriorating precision. One can overcome this problem by indexing documents according to context and meaning rather than keywords, although this requires a method of converting words to meanings and the creation of a meaning-based index structure. We have solved the problem of an index structure through the design and implementation of a concept-based model using domain-dependent ontologies. An ontology is a collection of concepts and their interrelationships that provide an abstract view of an application domain. With regard to converting words to meaning, the key issue is to identify appropriate concepts that both describe and identify documents as well as language employed in user requests. This paper describes an automatic mechanism for selecting these concepts. An important novelty is a scalable disambiguation algorithm that prunes irrelevant concepts and allows relevant ones to associate with documents and participate in query generation. We also propose an automatic query expansion mechanism that deals with user requests expressed in natural language. This mechanism generates database queries with appropriate and relevant expansion through knowledge encoded in ontology form. Focusing on audio data, we have constructed a demonstration prototype. We have experimentally and analytically shown that our model, compared to keyword search, achieves a significantly higher degree of precision and recall. The techniques employed can be applied to the problem of information selection in all media types.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "audio; metadata; ontology; precision; recall; SQL", } @Article{Donderler:2004:RBS, author = "Mehmet Emin D{\"o}nderler and {\"O}zg{\"u}r Ulusoy and Ugur G{\"u}d{\"u}kbay", title = "Rule-based spatiotemporal query processing for video databases", journal = j-VLDB-J, volume = "13", number = "1", pages = "86--103", month = jan, year = "2004", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0114-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:09 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In our earlier work, we proposed an architecture for a Web-based video database management system (VDBMS) providing an integrated support for spatiotemporal and semantic queries. In this paper, we focus on the task of spatiotemporal query processing and also propose an SQL-like video query language that has the capability to handle a broad range of spatiotemporal queries. The language is rule-based in that it allows users to express spatial conditions in terms of Prolog-type predicates. Spatiotemporal query processing is carried out in three main stages: query recognition, query decomposition, and query execution.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "content-based retrieval; inference rules; multimedia databases; spatiotemporal query processing; video databases", } @Article{Yu:2004:QHD, author = "Cui Yu and St{\'e}phane Bressan and Beng Chin Ooi and Kian-Lee Tan", title = "Querying high-dimensional data in single-dimensional space", journal = j-VLDB-J, volume = "13", number = "2", pages = "105--119", month = may, year = "2004", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-004-0121-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:10 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper, we propose a new tunable index scheme, called iMinMax($ \theta $), that maps points in high-dimensional spaces to single-dimensional values determined by their maximum or minimum values among all dimensions. By varying the tuning ``knob'', $ \theta $, we can obtain different families of iMinMax structures that are optimized for different distributions of data sets. The transformed data can then be indexed using existing single-dimensional indexing structures such as the B$^+$-trees. Queries in the high-dimensional space have to be transformed into queries in the single-dimensional space and evaluated there. We present efficient algorithms for evaluating window queries as range queries on the single-dimensional space. We conducted an extensive performance study to evaluate the effectiveness of the proposed schemes. Our results show that iMinMax($ \theta $) outperforms existing techniques, including the Pyramid scheme and VA-file, by a wide margin. We then describe how iMinMax could be used in approximate K-nearest neighbor (KNN) search, and we present a comparative study against the recently proposed iDistance, a specialized KNN indexing method.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "edge; high-dimensional data; iMinMax($\theta$); single-dimensional space; window and KNN queries", } @Article{Dori:2004:VVS, author = "Dov Dori", title = "{ViSWeb} --- the {Visual Semantic Web}: unifying human and machine knowledge representations with {Object-Process Methodology}", journal = j-VLDB-J, volume = "13", number = "2", pages = "120--147", month = may, year = "2004", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-004-0120-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:10 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The Visual Semantic Web (ViSWeb) is a new paradigm for enhancing the current Semantic Web technology. Based on Object-Process Methodology (OPM), which enables modeling of systems in a single graphic and textual model, ViSWeb provides for representation of knowledge over the Web in a unified way that caters to human perceptions while also being machine processable. The advantages of the ViSWeb approach include equivalent graphic-text knowledge representation, visual navigability, semantic sentence interpretation, specification of system dynamics, and complexity management. Arguing against the claim that humans and machines need to look at different knowledge representation formats, the principles and basics of various graphic and textual knowledge representations are presented and examined as candidates for ViSWeb foundation. Since OPM is shown to be most adequate for the task, ViSWeb is developed as an OPM-based layer on top of XML/RDF/OWL to express knowledge visually and in natural language. Both the graphic and the textual representations are strictly equivalent. Being intuitive yet formal, they are not only understandable to humans but are also amenable to computer processing. The ability to use such bimodal knowledge representation is potentially a major step forward in the evolution of the Semantic Web.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "conceptual graphs; knowledge representation; object-process methodology; Semantic Web; Visual Semantic Web", } @Article{Fu:2004:EHA, author = "Lixin Fu and Sanguthevar Rajasekaran", title = "Evaluating holistic aggregators efficiently for very large datasets", journal = j-VLDB-J, volume = "13", number = "2", pages = "148--161", month = may, year = "2004", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0112-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:10 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In data warehousing applications, numerous OLAP queries involve the processing of holistic aggregators such as computing the ``top $n$,'' median, quantiles, etc. In this paper, we present a novel approach called dynamic bucketing to efficiently evaluate these aggregators. We partition data into equiwidth buckets and further partition dense buckets into subbuckets as needed by allocating and reclaiming memory space. The bucketing process dynamically adapts to the input order and distribution of input datasets. The histograms of the buckets and subbuckets are stored in our new data structure called structure trees. A recent selection algorithm based on regular sampling is generalized and its analysis extended. We have also compared our new algorithms with this generalized algorithm and several other recent algorithms. Experimental results show that our new algorithms significantly outperform prior ones not only in the runtime but also in accuracy.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "aggregation; dynamic bucketing; quantiles", } @Article{Rahal:2004:ETU, author = "Amira Rahal and Qiang Zhu and Per-{\AA}ke Larson", title = "Evolutionary techniques for updating query cost models in a dynamic multidatabase environment", journal = j-VLDB-J, volume = "13", number = "2", pages = "162--176", month = may, year = "2004", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0110-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:10 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Deriving local cost models for query optimization in a dynamic multidatabase system (MDBS) is a challenging issue. In this paper, we study how to evolve a query cost model to capture a slowly-changing dynamic MDBS environment so that the cost model is kept up-to-date all the time. Two novel evolutionary techniques, i.e., the shifting method and the block-moving method, are proposed. The former updates a cost model by taking up-to-date information from a new sample query into consideration at each step, while the latter considers a block (batch) of new sample queries at each step. The relevant issues, including derivation of recurrence updating formulas, development of efficient algorithms, analysis and comparison of complexities, and design of an integrated scheme to apply the two methods adaptively, are studied. Our theoretical and experimental results demonstrate that the proposed techniques are quite promising in maintaining accurate cost models efficiently for a slowly changing dynamic MDBS environment. Besides the application to MDBSs, the proposed techniques can also be applied to the automatic maintenance of cost models in self-managing database systems.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "cost model; evolutionary technique; multidatabase; query optimization; self-managing database", } @Article{Adi:2004:ASM, author = "Asaf Adi and Opher Etzion", title = "{Amit} --- the situation manager", journal = j-VLDB-J, volume = "13", number = "2", pages = "177--203", month = may, year = "2004", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0108-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:10 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This paper presents the ``situation manager'', a tool that includes both a language and an efficient runtime execution mechanism aimed at reducing the complexity of active applications. This tool follows the observation that in many cases there is a gap between current tools that enable one to react to a single event (following the ECA: event-condition-action paradigm) and the reality in which a single event may not require any reaction; however, the reaction should be given to patterns over the event history. The concept of presented in this paper extends the concept of in its expressive power, flexibility, and usability. This paper motivates the work, surveys other efforts in this area, and discusses both the language and the execution model.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "active databases; active technology; composite events; high-level languages", } @Article{Freytag:2004:BPV, author = "Johann-Christoph Freytag and Serge Abiteboul and Mike Carey", title = "Best papers of {VLDB} 2003", journal = j-VLDB-J, volume = "13", number = "3", pages = "205--206", month = sep, year = "2004", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-004-0129-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:11 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ilyas:2004:STJ, author = "Ihab F. Ilyas and Walid G. Aref and Ahmed K. Elmagarmid", title = "Supporting top-$k$ join queries in relational databases", journal = j-VLDB-J, volume = "13", number = "3", pages = "207--221", month = sep, year = "2004", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-004-0128-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:11 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Ranking queries, also known as top-$k$ queries, produce results that are ordered on some computed score. Typically, these queries involve joins, where users are usually interested only in the top-$k$ join results. Top-$k$ queries are dominant in many emerging applications, e.g., multimedia retrieval by content, Web databases, data mining, middlewares, and most information retrieval applications. Current relational query processors do not handle ranking queries efficiently, especially when joins are involved. In this paper, we address supporting top-$k$ join queries in relational query processors. We introduce a new rank-join algorithm that makes use of the individual orders of its inputs to produce join results ordered on a user-specified scoring function. The idea is to rank the join results progressively during the join operation. We introduce two physical query operators based on variants of ripple join that implement the rank-join algorithm. The operators are nonblocking and can be integrated into pipelined execution plans. We also propose an efficient heuristic designed to optimize a top-$k$ join query by choosing the best join order. We address several practical issues and optimization heuristics to integrate the new join operators in practical query processors. We implement the new operators inside a prototype database engine based on PREDATOR. The experimental evaluation of our approach compares recent algorithms for joining ranked inputs and shows superior performance.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "query operators; rank aggregarion; ranking; top-$k$ queries", } @Article{Papadimitriou:2004:AUS, author = "Spiros Papadimitriou and Anthony Brockwell and Christos Faloutsos", title = "Adaptive, unsupervised stream mining", journal = j-VLDB-J, volume = "13", number = "3", pages = "222--239", month = sep, year = "2004", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-004-0130-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:11 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Sensor devices and embedded processors are becoming widespread, especially in measurement/monitoring applications. Their limited resources (CPU, memory and/or communication bandwidth, and power) pose some interesting challenges. We need concise, expressive models to represent the important features of the data and that lend themselves to efficient estimation. In particular, under these severe constraints, we want models and estimation methods that (a) require little memory and a single pass over the data, (b) can adapt and handle arbitrary periodic components, and (c) can deal with various types of noise. We propose $ {\mathrm {AWSOM}} $ (Arbitrary Window Stream mOdeling Method), which allows sensors in remote or hostile environments to efficiently and effectively discover interesting patterns and trends. This can be done automatically, i.e., with no prior inspection of the data or any user intervention and expert tuning before or during data gathering. Our algorithms require limited resources and can thus be incorporated into sensors --- possibly alongside a distributed query processing engine [10,6,27]. Updates are performed in constant time with respect to stream size using logarithmic space. Existing forecasting methods (SARIMA, GARCH, etc.) and ``traditional'' Fourier and wavelet analysis fall short on one or more of these requirements. To the best of our knowledge, $ {\mathrm {AWSOM}} $ is the first framework that combines all of the above characteristics. Experiments on real and synthetic datasets demonstrate that $ {\mathrm {AWSOM}} $ discovers meaningful patterns over long time periods. Thus, the patterns can also be used to make long-range forecasts, which are notoriously difficult to perform. In fact, $ {\mathrm {AWSOM}} $ outperforms manually set up autoregressive models, both in terms of long-term pattern detection and modeling and by at least $ 10 \times $ in resource consumption.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Labrinidis:2004:ETB, author = "Alexandros Labrinidis and Nick Roussopoulos", title = "Exploring the tradeoff between performance and data freshness in database-driven {Web} servers", journal = j-VLDB-J, volume = "13", number = "3", pages = "240--255", month = sep, year = "2004", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-004-0131-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:11 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Personalization, advertising, and the sheer volume of online data generate a staggering amount of dynamic Web content. In addition to Web caching, view materialization has been shown to accelerate the generation of dynamic Web content. View materialization is an attractive solution as it decouples the serving of access requests from the handling of updates. In the context of the Web, selecting which views to materialize must be decided online and needs to consider both performance and data freshness, which we refer to as the online view selection problem. In this paper, we define data freshness metrics, provide an adaptive algorithm for the online view selection problem that is based on user-specified data freshness requirements, and present experimental results. Furthermore, we examine alternative metrics for data freshness and extend our proposed algorithm to handle multiple users and alternative definitions of data freshness.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{He:2004:AIW, author = "Hai He and Weiyi Meng and Clement Yu and Zonghuan Wu", title = "Automatic integration of {Web} search interfaces with {WISE}-Integrator", journal = j-VLDB-J, volume = "13", number = "3", pages = "256--273", month = sep, year = "2004", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-004-0126-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:11 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "An increasing number of databases are becoming Web accessible through form-based search interfaces, and many of these sources are database-driven e-commerce sites. It is a daunting task for users to access numerous Web sites individually to get the desired information. Hence, providing a unified access to multiple e-commerce search engines selling similar products is of great importance in allowing users to search and compare products from multiple sites with ease. One key task for providing such a capability is to integrate the Web search interfaces of these e-commerce search engines so that user queries can be submitted against the integrated interface. Currently, integrating such search interfaces is carried out either manually or semiautomatically, which is inefficient and difficult to maintain. In this paper, we present WISE-Integrator --- a tool that performs automatic integration of Web Interfaces of Search Engines. WISE-Integrator explores a rich set of special metainformation that exists in Web search interfaces and uses the information to identify matching attributes from different search interfaces for integration. It also resolves domain differences of matching attributes. In this paper, we also discuss how to automatically extract information from search interfaces that is needed by WISE-Integrator to perform automatic interface integration. Our experimental results, based on 143 real-world search interfaces in four different domains, indicate that WISE-Integrator can achieve high attribute matching accuracy and can produce high-quality integrated search interfaces without human interactions.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "attribute matching; interface extraction; metasearch; schema integration; Web search interface integration", } @Article{Velegrakis:2004:PMC, author = "Yannis Velegrakis and Ren{\'e} J. Miller and Lucian Popa", title = "Preserving mapping consistency under schema changes", journal = j-VLDB-J, volume = "13", number = "3", pages = "274--293", month = sep, year = "2004", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-004-0136-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:11 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In dynamic environments like the Web, data sources may change not only their data but also their schemas, their semantics, and their query capabilities. When a mapping is left inconsistent by a schema change, it has to be detected and updated. We present a novel framework and a tool (ToMAS) for automatically adapting (rewriting) mappings as schemas evolve. Our approach considers not only local changes to a schema but also changes that may affect and transform many components of a schema. Our algorithm detects mappings affected by structural or constraint changes and generates all the rewritings that are consistent with the semantics of the changed schemas. Our approach explicitly models mapping choices made by a user and maintains these choices, whenever possible, as the schemas and mappings evolve. When there is more than one candidate rewriting, the algorithm may rank them based on how close they are to the semantics of the existing mappings.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Florescu:2004:BSX, author = "Daniela Florescu and Chris Hillery and Donald Kossmann and Paul Lucas and Fabio Riccardi and Till Westmann and J. Carey and Arvind Sundararajan", title = "The {BEA} streaming {XQuery} processor", journal = j-VLDB-J, volume = "13", number = "3", pages = "294--315", month = sep, year = "2004", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-004-0137-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:11 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This paper describes the design, implementation, and performance characteristics of a commercial XQuery processing engine, the BEA streaming XQuery processor. This XQuery engine was designed to provide high performance for message-processing applications, i.e., for transforming XML data streams. The engine is a central component of the 8.1 release of BEA's WebLogic Integration (WLI) product. The BEA XQuery engine is fully compliant with the August 2002 draft of the W3C XML Query Language specification and we are currently porting it to the latest version of the XQuery language (July 2004). A goal of this paper is to describe how a fully compliant yet efficient XQuery engine has been built from a few relatively simple components and well-understood technologies.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Gehrke:2004:GES, author = "Johannes Gehrke and M. Hellerstein", title = "{Guest Editorial} to the special issue on data stream processing", journal = j-VLDB-J, volume = "13", number = "4", pages = "317--317", month = dec, year = "2004", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:12 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yang:2004:FHQ, author = "Huai Yang and Li Lee and Wynne Hsu", title = "Finding hot query patterns over an {XQuery} stream", journal = j-VLDB-J, volume = "13", number = "4", pages = "318--332", month = dec, year = "2004", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:12 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Caching query results is one efficient approach to improving the performance of XML management systems. This entails the discovery of frequent XML queries issued by users. In this paper, we model user queries as a stream of XML query pattern trees and mine the frequent query patterns over the query stream. To facilitate the one-pass mining process, we devise a novel data structure called DTS to summarize the pattern trees seen so far. By grouping the incoming pattern trees into batches, we can dynamically mark the active portion of the current batch in DTS and limit the enumeration of candidate trees to only the currently active pattern trees. We also design another summary data structure called ECTree that provides for the incremental computation of the frequent tree patterns over the query stream. Based on the above two constructs, we present two mining algorithms called XQSMinerI and XQSMinerII. XQSMinerI is fast, but it tends to overestimate, while XQSMinerII adopts a filter-and-refine approach to minimize the amount of overestimation. Experimental results show that the proposed methods are both efficient and scalable and require only small memory footprints.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "frequent pattern mining; pattern tree; stream mining; tree mining; XML query pattern", } @Article{Babcock:2004:OSD, author = "Brian Babcock and Shivnath Babu and Mayur Datar and Rajeev Motwani and Dilys Thomas", title = "Operator scheduling in data stream systems", journal = j-VLDB-J, volume = "13", number = "4", pages = "333--353", month = dec, year = "2004", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:12 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In many applications involving continuous data streams, data arrival is bursty and data rate fluctuates over time. Systems that seek to give rapid or real-time query responses in such an environment must be prepared to deal gracefully with bursts in data arrival without compromising system performance. We discuss one strategy for processing bursty streams --- adaptive, load-aware scheduling of query operators to minimize resource consumption during times of peak load. We show that the choice of an operator scheduling strategy can have significant impact on the runtime system memory usage as well as output latency. Our aim is to design a scheduling strategy that minimizes the maximum runtime system memory while maintaining the output latency within prespecified bounds. We first present Chain scheduling, an operator scheduling strategy for data stream systems that is near-optimal in minimizing runtime memory usage for any collection of single-stream queries involving selections, projections, and foreign-key joins with stored relations. Chain scheduling also performs well for queries with sliding-window joins over multiple streams and multiple queries of the above types. However, during bursts in input streams, when there is a buildup of unprocessed tuples, Chain scheduling may lead to high output latency. We study the online problem of minimizing maximum runtime memory, subject to a constraint on maximum latency. We present preliminary observations, negative results, and heuristics for this problem. A thorough experimental evaluation is provided where we demonstrate the potential benefits of Chain scheduling and its different variants, compare it with competing scheduling strategies, and validate our analytical conclusions.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data streams; latency; memory management; scheduling", } @Article{Ganguly:2004:TSE, author = "Sumit Ganguly and Minos Garofalakis and Rajeev Rastogi", title = "Tracking set-expression cardinalities over continuous update streams", journal = j-VLDB-J, volume = "13", number = "4", pages = "354--369", month = dec, year = "2004", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:12 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "There is growing interest in algorithms for processing and querying continuous data streams (i.e., data seen only once in a fixed order) with limited memory resources. In its most general form, a data stream is actually an update stream, i.e., comprising data-item deletions as well as insertions. Such massive update streams arise naturally in several application domains (e.g., monitoring of large IP network installations or processing of retail-chain transactions). Estimating the cardinality of set expressions defined over several (possibly distributed) update streams is perhaps one of the most fundamental query classes of interest; as an example, such a query may ask ``what is the number of distinct IP source addresses seen in passing packets from both router $ R_1 $ and $ R_2 $ but not router $ R_3 $?''. Earlier work only addressed very restricted forms of this problem, focusing solely on the special case of insert-only streams and specific operators (e.g., union). In this paper, we propose the first space-efficient algorithmic solution for estimating the cardinality of full-fledged set expressions over general update streams. Our estimation algorithms are probabilistic in nature and rely on a novel, hash-based synopsis data structure, termed ''2-level hash sketch''. We demonstrate how our 2-level hash sketch synopses can be used to provide low-error, high-confidence estimates for the cardinality of set expressions (including operators such as set union, intersection, and difference) over continuous update streams, using only space that is significantly sublinear in the sizes of the streaming input (multi-)sets. Furthermore, our estimators never require rescanning or resampling of past stream items, regardless of the number of deletions in the stream. We also present lower bounds for the problem, demonstrating that the space usage of our estimation algorithms is within small factors of the optimal. Finally, we propose an optimized, time-efficient stream synopsis (based on 2-level hash sketches) that provides similar, strong accuracy-space guarantees while requiring only guaranteed logarithmic maintenance time per update, thus making our methods applicable for truly rapid-rate data streams. Our results from an empirical study of our synopsis and estimation techniques verify the effectiveness of our approach.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "approximate query processing; data streams; data synopses; randomized algorithms; set expressions", } @Article{Balakrishnan:2004:RA, author = "Hari Balakrishnan and Magdalena Balazinska and Don Carney and U{\=g}ur {\c{C}}etintemel and Mitch Cherniack and Christian Convey and Eddie Galvez and Jon Salz and Michael Stonebraker and Nesime Tatbul and Richard Tibbetts and Stan Zdonik", title = "Retrospective on {Aurora}", journal = j-VLDB-J, volume = "13", number = "4", pages = "370--383", month = dec, year = "2004", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:12 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This experience paper summarizes the key lessons we learned throughout the design and implementation of the Aurora stream-processing engine. For the past 2 years, we have built five stream-based applications using Aurora. We first describe in detail these applications and their implementation in Aurora. We then reflect on the design of Aurora based on this experience. Finally, we discuss our initial ideas on a follow-on project, called Borealis, whose goal is to eliminate the limitations of Aurora as well as to address new key challenges and applications in the stream-processing domain.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data stream management; distributed stream processing; monitoring applications; quality-of-service; stream-processing engines", } @Article{Sharaf:2004:BEE, author = "A. Sharaf and Jonathan Beaver and Alexandros Labrinidis and K. Chrysanthis", title = "Balancing energy efficiency and quality of aggregate data in sensor networks", journal = j-VLDB-J, volume = "13", number = "4", pages = "384--403", month = dec, year = "2004", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:12 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In-network aggregation has been proposed as one method for reducing energy consumption in sensor networks. In this paper, we explore two ideas related to further reducing energy consumption in the context of in-network aggregation. The first is by influencing the construction of the routing trees for sensor networks with the goal of reducing the size of transmitted data. To this end, we propose a group-aware network configuration method that ``clusters'' along the same path sensor nodes that belong to the same group. The second idea involves imposing a hierarchy of output filters on the sensor network with the goal of both reducing the size of transmitted data and minimizing the number of transmitted messages. More specifically, we propose a framework to use temporal coherency tolerances in conjunction with in-network aggregation to save energy at the sensor nodes while maintaining specified quality of data. These tolerances are based on user preferences or can be dictated by the network in cases where the network cannot support the current tolerance level. Our framework, called TiNA, works on top of existing in-network aggregation schemes. We evaluate experimentally our proposed schemes in the context of existing in-network aggregation schemes. We present experimental results measuring energy consumption, response time, and quality of data for Group-By queries. Overall, our schemes provide significant energy savings with respect to communication and a negligible drop in quality of data.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "in-network query processing; power-aware computing; semantic routing; sensor networks", } @Article{Ozsu:2005:E, author = "Tamer {\"O}zsu", title = "Editorial", journal = j-VLDB-J, volume = "14", number = "1", pages = "1--1", month = mar, year = "2005", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:14 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Gao:2005:JOT, author = "Dengfeng Gao and S. Jensen and T. Snodgrass and D. Soo", title = "Join operations in temporal databases", journal = j-VLDB-J, volume = "14", number = "1", pages = "2--29", month = mar, year = "2005", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:14 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Joins are arguably the most important relational operators. Poor implementations are tantamount to computing the Cartesian product of the input relations. In a temporal database, the problem is more acute for two reasons. First, conventional techniques are designed for the evaluation of joins with equality predicates rather than the inequality predicates prevalent in valid-time queries. Second, the presence of temporally varying data dramatically increases the size of a database. These factors indicate that specialized techniques are needed to efficiently evaluate temporal joins. We address this need for efficient join evaluation in temporal databases. Our purpose is twofold. We first survey all previously proposed temporal join operators. While many temporal join operators have been defined in previous work, this work has been done largely in isolation from competing proposals, with little, if any, comparison of the various operators. We then address evaluation algorithms, comparing the applicability of various algorithms to the temporal join operators and describing a performance study involving algorithms for one important operator, the temporal equijoin. Our focus, with respect to implementation, is on non-index-based join algorithms. Such algorithms do not rely on auxiliary access paths but may exploit sort orderings to achieve efficiency.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "attribute skew; interval join; partition join; sort-merge join; temporal Cartesian product; temporal join; timestamp skew", } @Article{Balmin:2005:SQX, author = "Andrey Balmin and Yannis Papakonstantinou", title = "Storing and querying {XML} data using denormalized relational databases", journal = j-VLDB-J, volume = "14", number = "1", pages = "30--49", month = mar, year = "2005", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:14 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "XML database systems emerge as a result of the acceptance of the XML data model. Recent works have followed the promising approach of building XML database management systems on underlying RDBMS's. Achieving query processing performance reduces to two questions: (i) How should the XML data be decomposed into data that are stored in the RDBMS? (ii) How should the XML query be translated into an efficient plan that sends one or more SQL queries to the underlying RDBMS and combines the data into the XML result? We provide a formal framework for XML Schema-driven decompositions, which encompasses the decompositions proposed in prior work and extends them with decompositions that employ denormalized tables and binary-coded XML fragments. We provide corresponding query processing algorithms that translate the XML query conditions into conditions on the relational tables and assemble the decomposed data into the XML query result. Our key performance focus is the response time for delivering the first results of a query. The most effective of the described decompositions have been implemented in XCacheDB, an XML DBMS built on top of a commercial RDBMS, which serves as our experimental basis. We present experiments and analysis that point to a class of decompositions, called inlined decompositions, that improve query performance for full results and first results, without significant increase in the size of the database.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Gal:2005:FME, author = "Avigdor Gal and Ateret Anaby-Tavor and Alberto Trombetta and Danilo Montesi", title = "A framework for modeling and evaluating automatic semantic reconciliation", journal = j-VLDB-J, volume = "14", number = "1", pages = "50--67", month = mar, year = "2005", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:14 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The introduction of the Semantic Web vision and the shift toward machine understandable Web resources has unearthed the importance of automatic semantic reconciliation. Consequently, new tools for automating the process were proposed. In this work we present a formal model of semantic reconciliation and analyze in a systematic manner the properties of the process outcome, primarily the inherent uncertainty of the matching process and how it reflects on the resulting mappings. An important feature of this research is the identification and analysis of factors that impact the effectiveness of algorithms for automatic semantic reconciliation, leading, it is hoped, to the design of better algorithms by reducing the uncertainty of existing algorithms. Against this background we empirically study the aptitude of two algorithms to correctly match concepts. This research is both timely and practical in light of recent attempts to develop and utilize methods for automatic semantic reconciliation.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "mapping; ontology versioning; semantic interoperability", } @Article{Halevy:2005:SML, author = "Y. Halevy and G. Ives and Dan Suciu and Igor Tatarinov", title = "Schema mediation for large-scale semantic data sharing", journal = j-VLDB-J, volume = "14", number = "1", pages = "68--83", month = mar, year = "2005", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:14 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Intuitively, data management and data integration tools should be well suited for exchanging information in a semantically meaningful way. Unfortunately, they suffer from two significant problems: they typically require a common and comprehensive schema design before they can be used to store or share information, and they are difficult to extend because schema evolution is heavyweight and may break backward compatibility. As a result, many large-scale data sharing tasks are more easily facilitated by non-database-oriented tools that have little support for semantics. The goal of the peer data management system (PDMS) is to address this need: we propose the use of a decentralized, easily extensible data management architecture in which any user can contribute new data, schema information, or even mappings between other peers' schemas. PDMSs represent a natural step beyond data integration systems, replacing their single logical schema with an interlinked collection of semantic mappings between peers' individual schemas. This paper considers the problem of schema mediation in a PDMS. Our first contribution is a flexible language for mediating between peer schemas that extends known data integration formalisms to our more complex architecture. We precisely characterize the complexity of query answering for our language. Next, we describe a reformulation algorithm for our language that generalizes both global-as-view and local-as-view query answering algorithms. Then we describe several methods for optimizing the reformulation algorithm and an initial set of experiments studying its performance. Finally, we define and consider several {\em global\/} problems in managing semantic mappings in a PDMS.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data integration; peer data management; schema mediation; Web and databases", } @Article{Benatallah:2005:AWS, author = "Boualem Benatallah and Mohand-Said Hacid and Alain Leger and Christophe Rey and Farouk Toumani", title = "On automating {Web} services discovery", journal = j-VLDB-J, volume = "14", number = "1", pages = "84--96", month = mar, year = "2005", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:14 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "One of the challenging problems that Web service technology faces is the ability to effectively discover services based on their capabilities. We present an approach to tackling this problem in the context of description logics (DLs). We formalize service discovery as a new instance of the problem of rewriting concepts using terminologies. We call this new instance the {\em best covering problem}. We provide a formalization of the {\em best covering problem\/} in the framework of DL-based ontologies and propose a hypergraph-based algorithm to effectively compute best covers of a given request. We propose a novel matchmaking algorithm that takes as input a service request (or query) $Q$ and an ontology $ \mathcal {T}$ of services and finds a set of services called a ``best cover'' of $Q$ whose descriptions contain as much {\em common information\/} with $Q$ as possible and as little {\em extra information\/} with respect to $Q$ as possible. We have implemented the proposed discovery technique and used the developed prototype in the context of the {\em Multilingual Knowledge Based European Electronic Marketplace\/} (MKBEEM) project.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "description logics; discovery; hypergraphs; semantic matchmaking; Web services", } @Article{Sattler:2005:CBQ, author = "Kai-Uwe Sattler and Ingolf Geist and Eike Schallehn", title = "Concept-based querying in mediator systems", journal = j-VLDB-J, volume = "14", number = "1", pages = "97--111", month = mar, year = "2005", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:14 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "One approach to overcoming heterogeneity as a part of data integration in mediator systems is the use of metadata in the form of a vocabulary or ontology to represent domain knowledge explicitly. This requires including this meta level during query formulation and processing. In this paper, we address this problem in the context of a mediator that uses a concept-based integration model and an extension of the XQuery language called CQuery. This mediator has been developed as part of a project for integrating data about cultural assets. We describe the language extensions and their semantics as well as the rewriting and evaluation steps. Furthermore, we discuss aspects of caching and keyword-based search in support of an efficient query formulation and processing.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data integration; mediator systems; query processing", } @Article{Tzitzikas:2005:MTB, author = "Yannis Tzitzikas and Nicolas Spyratos and Panos Constantopoulos", title = "Mediators over taxonomy-based information sources", journal = j-VLDB-J, volume = "14", number = "1", pages = "112--136", month = mar, year = "2005", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:14 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We propose a mediator model for providing integrated and unified access to multiple taxonomy-based sources. Each source comprises a taxonomy and a database that indexes objects under the terms of the taxonomy. A mediator comprises a taxonomy and a set of relations between the mediator's and the sources' terms, called articulations. By combining different modes of query evaluation at the sources and the mediator and different types of query translation, a flexible, efficient scheme of mediator operation is obtained that can accommodate various application needs and levels of answer quality. We adopt a simple conceptual modeling approach (taxonomies and intertaxonomy mappings) and we illustrate its advantages in terms of ease of use, uniformity, scalability, and efficiency. These characteristics make this proposal appropriate for a large-scale network of sources and mediators.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "approximate query translation; information integration; mediators; taxonomies", } @Article{Gunopulos:2005:SEM, author = "Dimitrios Gunopulos and George Kollios and J. Tsotras and Carlotta Domeniconi", title = "Selectivity estimators for multidimensional range queries over real attributes", journal = j-VLDB-J, volume = "14", number = "2", pages = "137--154", month = apr, year = "2005", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:15 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Estimating the selectivity of multidimensional range queries over real valued attributes has significant applications in data exploration and database query optimization. In this paper, we consider the following problem: given a table of $d$ attributes whose domain is the real numbers and a query that specifies a range in each dimension, find a good approximation of the number of records in the table that satisfy the query. The simplest approach to tackle this problem is to assume that the attributes are independent. More accurate estimators try to capture the joint data distribution of the attributes. In databases, such estimators include the construction of multidimensional histograms, random sampling, or the wavelet transform. In statistics, kernel estimation techniques are being used. Many traditional approaches assume that attribute values come from discrete, finite domains, where different values have high frequencies. However, for many novel applications (as in temporal, spatial, and multimedia databases) attribute values come from the infinite domain of real numbers. Consequently, each value appears very infrequently, a characteristic that affects the behavior and effectiveness of the estimator. Moreover, real-life data exhibit attribute correlations that also affect the estimator. We present a new histogram technique that is designed to approximate the density of multidimensional datasets with real attributes. Our technique defines buckets of variable size and allows the buckets to overlap. The size of the cells is based on the local density of the data. The use of overlapping buckets allows a more compact approximation of the data distribution. We also show how to generalize kernel density estimators and how to apply them to the multidimensional query approximation problem. Finally, we compare the accuracy of the proposed techniques with existing techniques using real and synthetic datasets. The experimental results show that the proposed techniques behave more accurately in high dimensionalities than previous approaches.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Alhajj:2005:VFC, author = "Reda Alhajj and Faruk Polat and Cem Y{\'\i}lmaz", title = "Views as first-class citizens in object-oriented databases", journal = j-VLDB-J, volume = "14", number = "2", pages = "155--169", month = apr, year = "2005", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:15 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Extensibility and dynamic schema evolution are among the attractive features that lead to the wide acceptance of the object-oriented paradigm. Not knowing all class hierarchy details should not prevent a user from introducing new classes when necessary. Naive or professional users may define new classes either by using class definition constructs or as views. However, improper placement of such classes leads to a flat hierarchy with many things duplicated. To overcome this problem, we automated the process in order to help the user find the most appropriate position with respect to her class in the hierarchy regardless of her knowledge of the hierarchy. The system must be responsible for the proper placement of new classes because only the system has complete knowledge of the details of the class hierarchy, especially in a dynamic environment where changes are very frequent. In other published work, we proved that to define a view it is enough to have the set of objects that qualify to be in a view in addition to having message expressions (possible paths) that lead to desired values within those objects. Here, we go further to map a view that is intended to be persistent into a class. Then we investigate the proper position of that class in the hierarchy. To achieve this, we consider current characteristics of a new class in order to derive its relationship with other existing classes in the hierarchy. Another advantage of the presented model is that views that generate new objects are still updatable simply because we based the creation of new objects on existing identities. In other words, an object participates inside view objects by its identity regardless of which particular values from that object are of interest to the view. Values are reachable via message expressions, not violating encapsulation. This way, actual values are present in only one place and can be updated.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "class hierarchy; object-oriented databases; reusability; schema evolution; views", } @Article{Zhang:2005:OSM, author = "Donghui Zhang and J. Tsotras", title = "Optimizing spatial {Min\slash Max} aggregations", journal = j-VLDB-J, volume = "14", number = "2", pages = "170--181", month = apr, year = "2005", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:15 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Aggregate computation over a collection of spatial objects appears in many real-life applications. Aggregates are computed on values (weights) associated with spatial objects, for example, the temperature or rainfall over the area covered by the object. In this paper we concentrate on MIN/MAX aggregations: ``given a query rectangle, find the minimum/maximum weight among all objects intersecting the query rectangle.'' Traditionally such queries have been performed as range searches. Assuming that objects are indexed by a spatial access method (SAM), the MIN/MAX is computed while retrieving those objects intersecting the query interval. This requires effort proportional to the number of objects satisfying the query, which may be large. A better approach is to maintain aggregate information among the index nodes of the spatial access method; then various index paths can be eliminated during the range search. Yet another approach is to build a specialized index that maintains the aggregate incrementally. We propose four novel optimizations for improving the performance of MIN/MAX queries when an index structure (traditional or specialized) is present. Moreover, we introduce the MR-tree, an R-tree-like dynamic specialized index that incorporates all four optimizations. Our experiments show that the MR-tree offers drastic performance improvement over previous solutions. As a byproduct of this work we present an optimized version of the MSB-tree, an index that has been proposed for the MIN/MAX computation over 1D interval objects.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "indexing; MIN/MAX; spatial aggregates", } @Article{Perich:2005:CJP, author = "Filip Perich and Anupam Joshi and Yelena Yesha and Tim Finin", title = "Collaborative joins in a pervasive computing environment", journal = j-VLDB-J, volume = "14", number = "2", pages = "182--196", month = apr, year = "2005", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:15 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We present a collaborative query processing protocol based on the principles of Contract Nets. The protocol is designed for pervasive computing environments where, in addition to operating on limited computing and battery resources, mobile devices cannot always rely on being able to access the wired infrastructure. Devices, therefore, need to collaborate with each other in order to obtain data otherwise inaccessible due to the nature of the environment. Furthermore, by intelligently using answers cached by peers, devices can reduce their computation cost. We show the effectiveness of our approach by evaluating performance of devices querying for data while moving in a citylike environment.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "distributed join processing; mobile ad hoc networks; peer-to-peer computing; pervasive computing environments; query processing", } @Article{Josifovski:2005:QXS, author = "Vanja Josifovski and Marcus Fontoura and Attila Barta", title = "Querying {XML} streams", journal = j-VLDB-J, volume = "14", number = "2", pages = "197--210", month = apr, year = "2005", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:15 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Efficient querying of XML streams will be one of the fundamental features of next-generation information systems. In this paper we propose the TurboXPath path processor, which accepts a language equivalent to a subset of the for-let-where constructs of XQuery over a single document. TurboXPath can be extended to provide full XQuery support or used to augment federated database engines for efficient handling of queries over XML data streams produced by external sources. Internally, TurboXPath uses a tree-shaped path expression with multiple outputs to drive the execution. The result of a query execution is a sequence of tuples of XML fragments matching the output nodes. Based on a streamed execution model, TurboXPath scales up to large documents and has limited memory consumption for increased concurrency. Experimental evaluation of a prototype demonstrates performance gains compared to other state-of-the-art path processors.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Aggarwal:2005:EEA, author = "C. Aggarwal and S. Yu", title = "An effective and efficient algorithm for high-dimensional outlier detection", journal = j-VLDB-J, volume = "14", number = "2", pages = "211--221", month = apr, year = "2005", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:15 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The outlier detection problem has important applications in the field of fraud detection, network robustness analysis, and intrusion detection. Most such applications are most important for high-dimensional domains in which the data can contain hundreds of dimensions. Many recent algorithms have been proposed for outlier detection that use several concepts of proximity in order to find the outliers based on their relationship to the other points in the data. However, in high-dimensional space, the data are sparse and concepts using the notion of proximity fail to retain their effectiveness. In fact, the sparsity of high-dimensional data can be understood in a different way so as to imply that every point is an equally good outlier from the perspective of distance-based definitions. Consequently, for high-dimensional data, the notion of finding meaningful outliers becomes substantially more complex and nonobvious. In this paper, we discuss new techniques for outlier detection that find the outliers by studying the behavior of projections from the data set.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data mining; high-dimensional spaces; outlier detection", } @Article{Yao:2005:HBL, author = "D. Yao and Cyrus Shahabi and Per-{\AA}ke Larson", title = "Hash-based labeling techniques for storage scaling", journal = j-VLDB-J, volume = "14", number = "2", pages = "222--237", month = apr, year = "2005", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:15 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Scalable storage architectures allow for the addition or removal of storage devices to increase storage capacity and bandwidth or retire older devices. Assuming random placement of data objects across multiple storage devices of a storage pool, our optimization objective is to redistribute a minimum number of objects after scaling the pool. In addition, a uniform distribution, and hence a balanced load, should be ensured after redistribution. Moreover, the redistributed objects should be retrieved efficiently during the normal mode of operation: in one I/O access and with low complexity computation. To achieve this, we propose an algorithm called random disk labeling (RDL), based on double hashing, where storage can be added or removed without any increase in complexity. We compare RDL with other proposed techniques and demonstrate its effectiveness through experimentation.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "load balancing; random data placement; scalable storage systems", } @Article{Kollios:2005:IMO, author = "George Kollios and Dimitris Papadopoulos and Dimitrios Gunopulos and J. Tsotras", title = "Indexing mobile objects using dual transformations", journal = j-VLDB-J, volume = "14", number = "2", pages = "238--256", month = apr, year = "2005", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:15 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "With the recent advances in wireless networks, embedded systems, and GPS technology, databases that manage the location of moving objects have received increased interest. In this paper, we present indexing techniques for moving object databases. In particular, we propose methods to index moving objects in order to efficiently answer range queries about their current and future positions. This problem appears in real-life applications such as predicting future congestion areas in a highway system or allocating more bandwidth for areas where a high concentration of mobile phones is imminent. We address the problem in external memory and present dynamic solutions, both for the one-dimensional and the two-dimensional cases. Our approach transforms the problem into a dual space that is easier to index. Important in this dynamic environment is not only query performance but also the update processing, given the large number of moving objects that issue updates. We compare the dual-transformation approach with the TPR-tree, an efficient method for indexing moving objects that is based on time-parameterized index nodes. An experimental evaluation shows that the dual-transformation approach provides comparable query performance but has much faster update processing. Moreover, the dual method does not require establishing a predefined query horizon.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "access methods; mobile objects; spatiotemporal databases", } @Article{Jaluta:2005:CCR, author = "Ibrahim Jaluta and Seppo Sippu and Eljas Soisalon-Soininen", title = "Concurrency control and recovery for balanced {B}-link trees", journal = j-VLDB-J, volume = "14", number = "2", pages = "257--277", month = apr, year = "2005", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:15 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper we present new concurrent and recoverable B-link-tree algorithms. Unlike previous algorithms, ours maintain the balance of the B-link tree at all times, so that a logarithmic time bound for a search or an update operation is guaranteed under arbitrary sequences of record insertions and deletions. A database transaction can contain any number of operations of the form ``fetch the first (or next) matching record'', ``insert a record'', or ``delete a record'', where database records are identified by their primary keys. Repeatable-read-level isolation for transactions is guaranteed by key-range locking. The algorithms apply the write-ahead logging (WAL) protocol and the steal and no-force buffering policies for index and data pages. Record inserts and deletes on leaf pages of a B-link tree are logged using physiological redo-undo log records. Each structure modification such as a page split or merge is made an atomic action by keeping the pages involved in the modification latched for the (short) duration of the modification and the logging of that modification; at most two B-link-tree pages are kept $X$-latched at a time. Each structure modification brings the B-link tree into a structurally consistent and balanced state whenever the tree was structurally consistent and balanced initially. Each structure modification is logged using a single physiological redo-only log record. Thus, a structure modification will never be undone even if the transaction that gave rise to it eventually aborts. In restart recovery, the redo pass of our ARIES-based recovery protocol will always produce a structurally consistent and balanced B-link tree, on which the database updates by backward-rolling transactions can always be undone logically, when a physical (page-oriented) undo is no longer possible.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "concurrency control; recovery; transaction; tree-structure modifications", } @Article{Gaasterland:2005:SID, author = "Terry Gaasterland and H. V. Jagadish and Louiqa Raschid", title = "Special issue on data management, analysis, and mining for the life sciences", journal = j-VLDB-J, volume = "14", number = "3", pages = "279--280", month = sep, year = "2005", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-005-0165-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:16 MDT 2008", bibsource = "http://portal.acm.org/; http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0938-1287&volume=14&issue=3; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0938-1287&volume=14&issue=3&spage=279", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } %%% TO DO: [23-Aug-2006] ACM Portal lacks data for v14n4, v15n1, and v15n2 @Article{Tian:2005:PMC, author = "Yuanyuan Tian and Sandeep Tata and Richard A. Hankins and Jignesh M. Patel", title = "Practical methods for constructing suffix trees", journal = j-VLDB-J, volume = "14", number = "3", pages = "281--299", month = sep, year = "2005", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-005-0154-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:16 MDT 2008", bibsource = "http://portal.acm.org/; http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0938-1287&volume=14&issue=3; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0938-1287&volume=14&issue=3&spage=281", abstract = "Sequence datasets are ubiquitous in modern life-science applications, and querying sequences is a common and critical operation in many of these applications. The suffix tree is a versatile data structure that can be used to evaluate a wide variety of queries on sequence datasets, including evaluating exact and approximate string matches, and finding repeat patterns. However, methods for constructing suffix trees are often very time-consuming, especially for suffix trees that are large and do not fit in the available main memory. Even when the suffix tree fits in memory, it turns out that the processor cache behavior of theoretically optimal suffix tree construction methods is poor, resulting in poor performance. Currently, there are a large number of algorithms for constructing suffix trees, but the practical tradeoffs in using these algorithms for different scenarios are not well characterized. In this paper, we explore suffix tree construction algorithms over a wide spectrum of data sources and sizes. First, we show that on modern processors, a cache-efficient algorithm with $ O(n^2) $ worst-case complexity outperforms popular linear time algorithms like Ukkonen and McCreight, even for in-memory construction. For larger datasets, the disk I/O requirement quickly becomes the bottleneck in each algorithm's performance. To address this problem, we describe two approaches. First, we present a buffer management strategy for the $ O(n^2) $ algorithm. The resulting new algorithm, which we call ``Top Down Disk-based'' (TDD), scales to sizes much larger than have been previously described in literature. This approach far outperforms the best known disk-based construction methods. Second, we present a new disk-based suffix tree construction algorithm that is based on a sort-merge paradigm, and show that for constructing very large suffix trees with very little resources, this algorithm is more efficient than TDD.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "sequence matching; suffix tree construction", } @Article{Claypool:2005:SYD, author = "Kajal T. Claypool and Elke A. Rundensteiner", title = "Sync your data: update propagation for heterogeneous protein databases", journal = j-VLDB-J, volume = "14", number = "3", pages = "300--317", month = sep, year = "2005", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-005-0155-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:16 MDT 2008", bibsource = "http://portal.acm.org/; http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0938-1287&volume=14&issue=3; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0938-1287&volume=14&issue=3&spage=300", abstract = "The traditional model of bench (wet) chemistry in many life sciences domain is today actively complemented by computer-based discoveries utilizing the growing number of online data sources. A typical {\em computer-based discovery\/} scenario for many life scientists includes the creation of local caches of pertinent information from multiple online resources such as Swissprot [Nucleic Acid Res. 1 (28), 45--48 (2000)], PIR [Nucleic Acids Res. 28 (1), 41--44 (2000)], PDB [The Protein DataBank. Wiley, New York (2003)], to enable efficient data analysis. This local caching of data, however, exposes their research and eventual results to the problems of data staleness, that is, cached data may quickly be obsolete or incorrect, dependent on the updates that are made to the source data. This represents a significant challenge to the scientific community, forcing scientists to be continuously aware of the frequent changes made to public data sources, and more importantly aware of the potential effects on their own derived data sets during the course of their research. To address this significant challenge, in this paper we present an approach for handling update propagation between heterogeneous databases, guaranteeing data freshness for scientists irrespective of their choice of data source and its underlying data model or interface. We propose a {\em middle-layer\/} --based solution wherein first the change in the online data source is translated to a sequence of changes in the middle-layer; next each change in the middle-layer is propagated through an algebraic representation of the translation between the source and the target; and finally the net-change is translated to a set of changes that are then applied to the local cache. In this paper, we present our algebraic model that represents the mapping of the online resource to the local cache, as well as our adaptive propagation algorithm that can incrementally propagate both schema and data changes from the source to the cache in a data model independent manner. We present a case study based on a joint ongoing project with our collaborators in the Chemistry Department at UMass-Lowell to explicate our approach.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data transformation; data translation; schema evolution; update propagation; view maintenance", } @Article{Conery:2005:RBW, author = "John S. Conery and Julian M. Catchen and Michael Lynch", title = "Rule-based workflow management for bioinformatics", journal = j-VLDB-J, volume = "14", number = "3", pages = "318--329", month = sep, year = "2005", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-005-0153-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:16 MDT 2008", bibsource = "http://portal.acm.org/; http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0938-1287&volume=14&issue=3; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0938-1287&volume=14&issue=3&spage=318", abstract = "We describe a data-centric software architecture for bioinformatics workflows and a rule-based workflow enactment system that uses declarative specifications of data dependences between steps to automatically order the execution of those steps. A data-centric view allows researchers to develop abstract descriptions of workflow products and provides mechanisms for describing workflow steps as objects. The rule-based approach supports an iterative design methodology for creating new workflows, where steps can be developed in small, incremental updates, and the object orientation allows workflow steps developed for one project to be reused in other projects.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "bioinformatics; rule-based system; workflow", } @Article{Thakkar:2005:COE, author = "Snehal Thakkar and Jos{\'e} Luis Ambite and Craig A. Knoblock", title = "Composing, optimizing, and executing plans for bioinformatics web services", journal = j-VLDB-J, volume = "14", number = "3", pages = "330--353", month = sep, year = "2005", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-005-0158-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:16 MDT 2008", bibsource = "http://portal.acm.org/; http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0938-1287&volume=14&issue=3; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0938-1287&volume=14&issue=3&spage=330", abstract = "The emergence of a large number of bioinformatics datasets on the Internet has resulted in the need for flexible and efficient approaches to integrate information from multiple bioinformatics data sources and services. In this paper, we present our approach to automatically generate composition plans for web services, optimize the composition plans, and execute these plans efficiently. While data integration techniques have been applied to the bioinformatics domain, the focus has been on answering specific user queries. In contrast, we focus on automatically generating {\em parameterized\/} integration plans that can be hosted as web services that respond to a range of inputs. In addition, we present two novel techniques that improve the execution time of the generated plans by reducing the number of requests to the existing data sources and by executing the generated plan more efficiently. The first optimization technique, called tuple-level filtering, analyzes the source/service descriptions in order to automatically insert filtering conditions in the composition plans that result in fewer requests to the component web services. To ensure that the filtering conditions can be evaluated, this technique may include sensing operations in the integration plan. The savings due to filtering significantly exceed the cost of the sensing operations. The second optimization technique consists in mapping the integration plans into programs that can be executed by a dataflow-style, streaming execution engine. We use real-world bioinformatics web services to show experimentally that (1) our automatic composition techniques can efficiently generate parameterized plans that integrate data from large numbers of existing services and (2) our optimization techniques can significantly reduce the response time of the generated integration plans.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "bioinformatics; data integration; dataflow-style streaming execution; query optimization; Web service composition", } @Article{Vlachos:2006:IMT, author = "Michail Vlachos and Marios Hadjieleftheriou and Dimitrios Gunopulos and Eamonn Keogh", title = "Indexing {Multidimensional Time-Series}", journal = j-VLDB-J, volume = "15", number = "1", pages = "1--20", month = jan, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:17 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "While most time series data mining research has concentrated on providing solutions for a single distance function, in this work we motivate the need for an index structure that can support multiple distance measures. Our specific area of interest is the efficient retrieval and analysis of similar trajectories. Trajectory datasets are very common in environmental applications, mobility experiments, and video surveillance and are especially important for the discovery of certain biological patterns. Our primary similarity measure is based on the longest common subsequence (LCSS) model that offers enhanced robustness, particularly for noisy data, which are encountered very often in real-world applications. However, our index is able to accommodate other distance measures as well, including the ubiquitous Euclidean distance and the increasingly popular dynamic time warping (DTW). While other researchers have advocated one or other of these similarity measures, a major contribution of our work is the ability to support all these measures without the need to restructure the index. Our framework guarantees no false dismissals and can also be tailored to provide much faster response time at the expense of slightly reduced precision/recall. The experimental results demonstrate that our index can help speed up the computation of expensive similarity measures such as the LCSS and the DTW.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "dynamic time warping; ensemble index; longest common subsequence; motion capture; trajectories", } @Article{Zheng:2006:GPI, author = "Baihua Zheng and Jianliang Xu and Wang-Chien Lee and Lun Lee", title = "Grid-partition index: a hybrid method for nearest-neighbor queries in wireless location-based services", journal = j-VLDB-J, volume = "15", number = "1", pages = "21--39", month = jan, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:17 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Traditional nearest-neighbor (NN) search is based on two basic indexing approaches: object-based indexing and solution-based indexing. The former is constructed based on the locations of data objects: using some distance heuristics on object locations. The latter is built on a precomputed solution space. Thus, NN queries can be reduced to and processed as simple point queries in this solution space. Both approaches exhibit some disadvantages, especially when employed for wireless data broadcast in mobile computing environments. In this paper, we introduce a new index method, called the {\em grid-partition index}, to support NN search in both on-demand access and periodic broadcast modes of mobile computing. The grid-partition index is constructed based on the Voronoi diagram, i.e., the solution space of NN queries. However, it has two distinctive characteristics. First, it divides the solution space into grid cells such that a query point can be efficiently mapped into a grid cell around which the nearest object is located. This significantly reduces the search space. Second, the grid-partition index stores the {\em objects\/} that are potential NNs of any query falling within the cell. The storage of objects, instead of the Voronoi cells, makes the grid-partition index a hybrid of the solution-based and object-based approaches. As a result, it achieves a much more compact representation than the pure solution-based approach and avoids backtracked traversals required in the typical object-based approach, thus realizing the advantages of both approaches. We develop an incremental construction algorithm to address the issue of object update. In addition, we present a cost model to approximate the search cost of different grid partitioning schemes. The performances of the grid-partition index and existing indexes are evaluated using both synthetic and real data. The results show that, overall, the grid-partition index significantly outperforms object-based indexes and solution-based indexes. Furthermore, we extend the grid-partition index to support continuous-nearest-neighbor search. Both algorithms and experimental results are presented.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "continuous-nearest-neighbor search; index structure; location-dependent data; nearest-neighbor search; wireless broadcast", } @Article{Tamir:2006:CGM, author = "Raz Tamir and Yehuda Singer", title = "On a confidence gain measure for association rule discovery and scoring", journal = j-VLDB-J, volume = "15", number = "1", pages = "40--52", month = jan, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:17 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This article presents a new interestingness measure for association rules called confidence gain (CG). Focus is given to extraction of human associations rather than associations between market products. There are two main differences between the two (human and market associations). The first difference is the strong asymmetry of human associations (e.g., the association ``shampoo''--``hair'' is much stronger than ``hair''--``shampoo''), where in market products asymmetry is less intuitive and less evident. The second is the background knowledge humans employ when presented with a stimulus (input phrase).CG calculates the local confidence of a given term compared to its average confidence throughout a given database. CG is found to outperform several association measures since it captures both the asymmetric notion of an association (as in the confidence measure) while adding the comparison to an expected confidence (as in the lift measure). The use of average confidence introduces the ``background knowledge'' notion into the CG measure. Various experiments have shown that CG and local confidence gain (a low-complexity version of CG) successfully generate association rules when compared to human free associations. The experiments include a large-scale ``free sssociation Turing test'' where human free associations were compared to associations generated by the CG and other association measures. Rules discovered by CG were found to be significantly better than those discovered by other measures. CG can be used for many purposes, such as personalization, sense disambiguation, query expansion, and improving classification performance of small item sets within large databases. Although CG was found to be useful for Internet data retrieval, results can be easily used over any type of database.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "association generation; association rule validation methods; confidence gain; Web data management; Web mining", } @Article{Bremer:2006:IDD, author = "Jan-Marco Bremer and Michael Gertz", title = "Integrating document and data retrieval based on {XML}", journal = j-VLDB-J, volume = "15", number = "1", pages = "53--83", month = jan, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:17 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "For querying structured and semistructured data, data retrieval and document retrieval are two valuable and complementary techniques that have not yet been fully integrated. In this paper, we introduce integrated information retrieval (IIR), an XML-based retrieval approach that closes this gap. We introduce the syntax and semantics of an extension of the XQuery language called XQuery/IR. The extended language realizes IIR and thereby allows users to formulate new kinds of queries by nesting ranked document retrieval and precise data retrieval queries. Furthermore, we detail index structures and efficient query processing approaches for implementing XQuery/IR. Based on a new identification scheme for nodes in node-labeled tree structures, the extended index structures require only a fraction of the space of comparable index structures that only support data retrieval.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data retrieval; document retrieval; index structures; integrated information retrievals; structural join; XML", } @Article{Ogras:2006:OSD, author = "Y. Ogras and Hakan Ferhatosmanoglu", title = "Online summarization of dynamic time series data", journal = j-VLDB-J, volume = "15", number = "1", pages = "84--98", month = jan, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:17 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Managing large-scale time series databases has attracted significant attention in the database community recently. Related fundamental problems such as dimensionality reduction, transformation, pattern mining, and similarity search have been studied extensively. Although the time series data are dynamic by nature, as in data streams, current solutions to these fundamental problems have been mostly for the static time series databases. In this paper, we first propose a framework to online summary generation for large-scale and dynamic time series data, such as data streams. Then, we propose online transform-based summarization techniques over data streams that can be updated in constant time and space. We present both the exact and approximate versions of the proposed techniques and provide error bounds for the approximate case. One of our main contributions in this paper is the extensive performance analysis. Our experiments carefully evaluate the quality of the online summaries for point, range, and $ k ???? n n $ queries using real-life dynamic data sets of substantial size.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data streams; dimensionality reduction; time-series data; transformation-based summarization", } @Article{Goh:2006:DBM, author = "Leng Goh and Yanfeng Shu and Zhiyong Huang and Chin Ooi", title = "Dynamic buffer management with extensible replacement policies", journal = j-VLDB-J, volume = "15", number = "2", pages = "99--120", month = jun, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:18 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The objective of extensible DBMSs is to ease the construction of specialized DBMSs for nontraditional applications. Although much work has been done in providing various levels of extensibility (e.g., extensibility of data types and operators, query language extensibility, and query optimizer extensibility), there has been very limited research in providing extensibility at the buffer management level. Supporting extensibility at the buffer management level is important as it can contribute significantly to overall system performance. This paper addresses the problem of supporting extensibility of buffer replacement policies. The main contribution is the proposal of a framework for modeling buffer replacement policies. This work is novel in two aspects. First, by providing a uniform and generic specification of buffer replacement policies, the proposed framework unifies existing work in this area. Second, our work introduces a new level of extensibility. None of the existing extensible DBMSs, to our knowledge, provides extensibility at the buffer management level. The proposed framework provides a basis for the construction of an extensible buffer manager as part of a 100\% Java-based storage manager. We conducted an extensive performance study to investigate the performance of the proposed framework. The experimental results demonstrate that the proposed framework is indeed feasible for existing DBMSs and improves system performance significantly without costing significant overhead.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "buffer management; extensible DBMS; replacement strategies", } @Article{Arasu:2006:CCQ, author = "Arvind Arasu and Shivnath Babu and Jennifer Widom", title = "The {CQL} continuous query language: semantic foundations and query execution", journal = j-VLDB-J, volume = "15", number = "2", pages = "121--142", month = jun, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:18 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "{\em CQL}, a {\em continuous query language}, is supported by the STREAM prototype data stream management system (DSMS) at Stanford. CQL is an expressive SQL-based declarative language for registering continuous queries against streams and stored relations. We begin by presenting an abstract semantics that relies only on ``black-box'' mappings among streams and relations. From these mappings we define a precise and general interpretation for continuous queries. CQL is an instantiation of our abstract semantics using SQL to map from relations to relations, window specifications derived from SQL-99 to map from streams to relations, and three new operators to map from relations to streams. Most of the CQL language is operational in the STREAM system. We present the structure of CQL's query execution plans as well as details of the most important components: operators, interoperator queues, synopses, and sharing of components among multiple operators and queries. Examples throughout the paper are drawn from the {\em Linear Road\/} benchmark recently proposed for DSMSs. We also curate a public repository of data stream applications that includes a wide variety of queries expressed in CQL. The relative ease of capturing these applications in CQL is one indicator that the language contains an appropriate set of constructs for data stream processing.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "continuous queries; data streams; query language; query processing", } @Article{Hadjieleftheriou:2006:ISA, author = "Marios Hadjieleftheriou and George Kollios and J. Tsotras and Dimitrios Gunopulos", title = "Indexing spatiotemporal archives", journal = j-VLDB-J, volume = "15", number = "2", pages = "143--164", month = jun, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:18 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Spatiotemporal objects --- that is, objects that evolve over time --- appear in many applications. Due to the nature of such applications, storing the evolution of objects through time in order to answer historical queries (queries that refer to past states of the evolution) requires a very large specialized database, what is termed in this article a {\em spatiotemporal archive}. Efficient processing of historical queries on spatiotemporal archives requires equally sophisticated indexing schemes. Typical spatiotemporal indexing techniques represent the objects using minimum bounding regions (MBR) extended with a temporal dimension, which are then indexed using traditional multidimensional index structures. However, rough MBR approximations introduce excessive overlap between index nodes, which deteriorates query performance. This article introduces a robust indexing scheme for answering spatiotemporal queries more efficiently. A number of algorithms and heuristics are elaborated that can be used to preprocess a spatiotemporal archive in order to produce {\em finer object approximations}, which, in combination with {\em a multiversion index structure}, will greatly improve query performance in comparison to the straightforward approaches. The proposed techniques introduce a query efficiency vs. space tradeoff that can help tune a structure according to available resources. Empirical observations for estimating the necessary amount of additional storage space required for improving query performance by a given factor are also provided. Moreover, heuristics for applying the proposed ideas in an online setting are discussed. Finally, a thorough experimental evaluation is conducted to show the merits of the proposed techniques.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "indexing; moving objects; spatiotemporal databases; trajectories", } @Article{Guting:2006:MQM, author = "Hartmut G{\"u}ting and Teixeira de Almeida and Zhiming Ding", title = "Modeling and querying moving objects in networks", journal = j-VLDB-J, volume = "15", number = "2", pages = "165--190", month = jun, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:18 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Moving objects databases have become an important research issue in recent years. For modeling and querying moving objects, there exists a comprehensive framework of abstract data types to describe objects moving freely in the 2D plane, providing data types such as {\em moving point\/} or {\em moving region}. However, in many applications people or vehicles move along transportation networks. It makes a lot of sense to model the network explicitly and to describe movements relative to the network rather than unconstrained space, because then it is much easier to formulate in queries relationships between moving objects and the network. Moreover, such models can be better supported in indexing and query processing. In this paper, we extend the ADT approach by modeling networks explicitly and providing data types for static and moving network positions and regions. In a highway network, example entities corresponding to these data types are motels, construction areas, cars, and traffic jams. The network model is not too simplistic; it allows one to distinguish simple roads and divided highways and to describe the possible traversals of junctions precisely. The new types and operations are integrated seamlessly into the ADT framework to achieve a relatively simple, consistent and powerful overall model and query language for constrained and unconstrained movement.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "ADT; data type; moving object; network; spatio-temporal", } @Article{Chirkova:1999:AQU, author = "Rada Chirkova and Chen Li and Jia Li", title = "Answering queries using materialized views with minimum size", journal = j-VLDB-J, volume = "15", number = "3", pages = "191--210", month = apr, year = "1999", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:19 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper, we study the following problem. Given a database and a set of queries, we want to find a set of views that can compute the answers to the queries, such that the amount of space, in bytes, required to store the viewset is minimum on the given database. (We also handle problem instances where the input has a {\em set\/} of database instances, as described by an oracle that returns the sizes of view relations for given view definitions.) This problem is important for applications such as distributed databases, data warehousing, and data integration. We explore the decidability and complexity of the problem for workloads of conjunctive queries. We show that results differ significantly depending on whether the workload queries have self-joins. Further, for queries without self-joins we describe a very compact search space of views, which contains all views in at least one optimal viewset. We present techniques for finding a minimum-size viewset for a single query without self-joins by using the shape of the query and its constraints, and validate the approach by extensive experiments.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data warehouses; distributed systems; minimum-size viewsets; views", remark = "Check month: April or May??", } @Article{Cao:1999:STD, author = "Hu Cao and Ouri Wolfson and Goce Trajcevski", title = "Spatio-temporal data reduction with deterministic error bounds", journal = j-VLDB-J, volume = "15", number = "3", pages = "211--228", month = apr, year = "1999", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:19 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A common way of storing spatio-temporal information about mobile devices is in the form of a 3D (2D geography + time) trajectory. We argue that when cellular phones and Personal Digital Assistants become location-aware, the size of the spatio-temporal information generated may prohibit efficient processing. We propose to adopt a technique studied in computer graphics, namely line-simplification, as an approximation technique to solve this problem. Line simplification will reduce the size of the trajectories. Line simplification uses a distance function in producing the trajectory approximation. We postulate the desiderata for such a distance-function: it should be sound, namely the error of the answers to spatio-temporal queries must be bounded. We analyze several distance functions, and prove that some are sound in this sense for some types of queries, while others are not. A distance function that is sound for all common spatio-temporal query types is introduced and analyzed. Then we propose an aging mechanism which gradually shrinks the size of the trajectories as time progresses. We also propose to adopt existing linguistic constructs to manage the uncertainty introduced by the trajectory approximation. Finally, we analyze experimentally the effectiveness of line-simplification in reducing the size of a trajectories database.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data reduction; line simplification; moving objects database; uncertainty", remark = "Check month: April or May??", } @Article{Benetis:1999:NRN, author = "Rimantas Benetis and S. Jensen and Gytis Kar{\c{c}}iauskas and Simonas {\ocirc{S}}altenis", title = "Nearest and reverse nearest neighbor queries for moving objects", journal = j-VLDB-J, volume = "15", number = "3", pages = "229--249", month = apr, year = "1999", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:19 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "With the continued proliferation of wireless communications and advances in positioning technologies, algorithms for efficiently answering queries about large populations of moving objects are gaining interest. This paper proposes algorithms for $k$ nearest and reverse $k$ nearest neighbor queries on the current and anticipated future positions of points moving continuously in the plane. The former type of query returns $k$ objects nearest to a query object for each time point during a time interval, while the latter returns the objects that have a specified query object as one of their $k$ closest neighbors, again for each time point during a time interval. In addition, algorithms for so-called persistent and continuous variants of these queries are provided. The algorithms are based on the indexing of object positions represented as linear functions of time. The results of empirical performance experiments are reported.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "continuous queries; incremental update; location-based services; mobile objects; neighbor queries; persistent queries", remark = "Check month: April or May??", } @Article{Pelleg:1999:DTS, author = "Dan Pelleg and Andrew Moore", title = "Dependency trees in sub-linear time and bounded memory", journal = j-VLDB-J, volume = "15", number = "3", pages = "250--262", month = apr, year = "1999", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:19 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We focus on the problem of efficient learning of dependency trees. Once grown, they can be used as a special case of a Bayesian network, for PDF approximation, and for many other uses. Given the data, a well-known algorithm can fit an optimal tree in time that is quadratic in the number of attributes and linear in the number of records. We show how to modify it to exploit partial knowledge about edge weights. Experimental results show running time that is near-constant in the number of records, without significant loss in accuracy of the generated trees.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data mining; dependency trees; fast algorithms; probably approximately correct learning", remark = "Check month: April or May??", } @Article{Che:1999:QOX, author = "Dunren Che and Karl Aberer and Tamer {\"O}zsu", title = "Query optimization in {XML} structured-document databases", journal = j-VLDB-J, volume = "15", number = "3", pages = "263--289", month = apr, year = "1999", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:19 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "While the information published in the form of XML-compliant documents keeps fast mounting up, efficient and effective query processing and optimization for XML have now become more important than ever. This article reports our recent advances in XML structured-document query optimization. In this article, we elaborate on a novel approach and the techniques developed for XML query optimization. Our approach performs heuristic-based algebraic transformations on XPath queries, represented as PAT algebraic expressions, to achieve query optimization. This article first presents a comprehensive set of general equivalences with regard to XML documents and XML queries. Based on these equivalences, we developed a large set of deterministic algebraic transformation rules for XML query optimization. Our approach is unique, in that it performs exclusively deterministic transformations on queries for fast optimization. The deterministic nature of the proposed approach straightforwardly renders high optimization efficiency and simplicity in implementation. Our approach is a logical-level one, which is independent of any particular storage model. Therefore, the optimizers developed based on our approach can be easily adapted to a broad range of XML data/information servers to achieve fast query optimization. Experimental study confirms the validity and effectiveness of the proposed approach.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "deterministic query optimization; query transformation; XML database; XML query optimization; XML query processing", remark = "Check month: April or May??", } @Article{Ferrari:2006:GES, author = "Elena Ferrari and Bhavani Thuraisingham", title = "Guest editorial: special issue on privacy preserving data management", journal = j-VLDB-J, volume = "15", number = "4", pages = "291--292", month = nov, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", remark = "Check month: April or November??", } @Article{Mukherjee:2006:PPT, author = "Shibnath Mukherjee and Zhiyuan Chen and Aryya Gangopadhyay", title = "A privacy-preserving technique for {Euclidean} distance-based mining algorithms using {Fourier}-related transforms", journal = j-VLDB-J, volume = "15", number = "4", pages = "293--315", month = nov, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Privacy preserving data mining has become increasingly popular because it allows sharing of privacy-sensitive data for analysis purposes. However, existing techniques such as random perturbation do not fare well for simple yet widely used and efficient Euclidean distance-based mining algorithms. Although original data distributions can be pretty accurately reconstructed from the perturbed data, distances between individual data points are not preserved, leading to poor accuracy for the distance-based mining methods. Besides, they do not generally focus on data reduction. Other studies on secure multi-party computation often concentrate on techniques useful to very specific mining algorithms and scenarios such that they require modification of the mining algorithms and are often difficult to generalize to other mining algorithms or scenarios. This paper proposes a novel generalized approach using the well-known energy compaction power of Fourier-related transforms to hide sensitive data values and to approximately preserve Euclidean distances in centralized and distributed scenarios to a great degree of accuracy. Three algorithms to select the most important transform coefficients are presented, one for a centralized database case, the second one for a horizontally partitioned, and the third one for a vertically partitioned database case. Experimental results demonstrate the effectiveness of the proposed approach.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data mining; Fourier transform; privacy", remark = "Check month: September or November??", } @Article{Jiang:2006:SDF, author = "Wei Jiang and Chris Clifton", title = "A secure distributed framework for achieving $k$-anonymity", journal = j-VLDB-J, volume = "15", number = "4", pages = "316--333", month = nov, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "$k$-anonymity provides a measure of privacy protection by preventing re-identification of data to fewer than a group of $k$ data items. While algorithms exist for producing $k$-anonymous data, the model has been that of a single source wanting to publish data. Due to privacy issues, it is common that data from different sites cannot be shared directly. Therefore, this paper presents a two-party framework along with an application that generates $k$-anonymous data from two vertically partitioned sources without disclosing data from one site to the other. The framework is privacy preserving in the sense that it satisfies the secure definition commonly defined in the literature of Secure Multiparty Computation.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "anonymity; privacy; security", remark = "Check month: April or November??", } @Article{Blanton:2006:SRF, author = "Marina Blanton and Mikhail Atallah", title = "Succinct representation of flexible and privacy-preserving access rights", journal = j-VLDB-J, volume = "15", number = "4", pages = "334--354", month = nov, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We explore the problem of portable and flexible privacy preserving access rights that permit access to a large collection of digital goods. {\em Privacy-preserving\/} access control means that the service provider can neither learn what access rights a customer has nor link a request to access an item to a particular customer, thus maintaining privacy of both customer activity and customer access rights. {\em Flexible\/} access rights allow a customer to choose a subset of items or groups of items from the repository, obtain access to and be charged only for the items selected. And {\em portability\/} of access rights means that the rights themselves can be stored on small devices of limited storage space and computational capabilities such as smartcards or sensors, and therefore the rights must be enforced using the limited resources available. In this paper, we present and compare two schemes that address the problem of such access rights. We show that much can be achieved if one allows for even a negligible amount of false positives --- items that were not requested by the customer, but inadvertently were included in the customer access right representation due to constrained space resources. But minimizing false positives is one of many other desiderata that include protection against sharing of false positives information by unscrupulous users, providing the users with transaction untraceability and unlinkability, and forward compatibility of the scheme. Our first scheme does not place any constraints on the amount of space available on the limited-capacity storage device, and searches for the best representation that meets the requirements. The second scheme, on the other hand, has (modest) requirements on the storage space available, but guarantees a low rate of false positives: with $ O(m c) $ storage space available on the smartcard (where $m$ is the number of items or groups of items included in the subscription and $c$ is a selectable parameter), it achieves a rate of false positives of $ m^{-c}$.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "compact representation; flexible access rights; privacy-preserving access rights", remark = "Check month: April or November??", } @Article{Domingo-Ferrer:2006:EMD, author = "Josep Domingo-Ferrer and Antoni Mart{\'\i}nez-Ballest{\'e} and Josep Maria Mateo-Sanz and Francesc Seb{\'e}", title = "Efficient multivariate data-oriented microaggregation", journal = j-VLDB-J, volume = "15", number = "4", pages = "355--369", month = nov, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Microaggregation is a family of methods for statistical disclosure control (SDC) of microdata (records on individuals and/or companies), that is, for masking microdata so that they can be released while preserving the privacy of the underlying individuals. The principle of microaggregation is to aggregate original database records into small groups prior to publication. Each group should contain at least $k$ records to prevent disclosure of individual information, where $k$ is a constant value preset by the data protector. Recently, microaggregation has been shown to be useful to achieve $k$-anonymity, in addition to it being a good masking method. Optimal microaggregation (with minimum within-groups variability loss) can be computed in polynomial time for univariate data. Unfortunately, for multivariate data it is an NP-hard problem. Several heuristic approaches to microaggregation have been proposed in the literature. Heuristics yielding groups with fixed size $k$ tends to be more efficient, whereas data-oriented heuristics yielding variable group size tends to result in lower information loss. This paper presents new data-oriented heuristics which improve on the trade-off between computational complexity and information loss and are thus usable for large datasets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "anonymity; microaggregation; microdata protection; privacy; statistical databases; statistical disclosure control", remark = "Check month: April or November??", } @Article{Massacci:2006:HHD, author = "Fabio Massacci and John Mylopoulos and Nicola Zannone", title = "Hierarchical {Hippocratic} databases with minimal disclosure for virtual organizations", journal = j-VLDB-J, volume = "15", number = "4", pages = "370--387", month = nov, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The protection of customer privacy is a fundamental issue in today's corporate marketing strategies. Not surprisingly, many research efforts have proposed new privacy-aware technologies. Among them, Hippocratic databases offer mechanisms for enforcing privacy rules in database systems for inter-organizational business processes (also known as virtual organizations). This paper extends these mechanisms to allow for hierarchical purposes, distributed authorizations and minimal disclosure supporting the business processes of virtual organizations that want to offer their clients a number of ways to fulfill a service. Specifically, we use a goal-oriented approach to analyze privacy policies of the enterprises involved in a business process. On the basis of the purpose hierarchy derived through a goal refinement process, we provide algorithms for determining the minimum set of authorizations needed to achieve a service. This allows us to automatically derive access control policies for an inter-organizational business process from the collection of privacy policies associated with different participating enterprises. By using effective on-line algorithms, the derivation of such minimal information can also be done on-the-fly by the customer wishing to access a service.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "access control; delegation; information security; minimal disclosure; privacy protection; private data management; virtual organizations", remark = "Check month: April or November??", } @Article{Xiong:2006:PLM, author = "Hui Xiong and Michael Steinbach and Vipin Kumar", title = "Privacy leakage in multi-relational databases: a semi-supervised learning perspective", journal = j-VLDB-J, volume = "15", number = "4", pages = "388--402", month = nov, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In multi-relational databases, a view, which is a context- and content-dependent subset of one or more tables (or other views), is often used to preserve privacy by hiding sensitive information. However, recent developments in data mining present a new challenge for database security even when traditional database security techniques, such as database access control, are employed. This paper presents a data mining framework using semi-supervised learning that demonstrates the potential for privacy leakage in multi-relational databases. Many different types of semi-supervised learning techniques, such as the K-nearest neighbor (KNN) method, can be used to demonstrate privacy leakage. However, we also introduce a new approach to semi-supervised learning, hyperclique pattern-based semi-supervised learning (HPSL), which differs from traditional semi-supervised learning approaches in that it considers the similarity among groups of objects instead of only pairs of objects. Our experimental results show that both the KNN and HPSL methods have the ability to compromise database security, although the HPSL is better at this privacy violation (has higher prediction accuracy) than the KNN method. Finally, we provide a principle for avoiding privacy leakage in multi-relational databases via semi-supervised learning and illustrate this principle with a simple preventive technique whose effectiveness is demonstrated by experiments.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", remark = "Check month: April or November??", } @Article{Haas:2006:SIB, author = "Laura M. Haas and Christian S. Jensen and Martin L. Kersten", title = "Special issue: best papers of {VLDB 2005}", journal = j-VLDB-J, volume = "16", number = "1", pages = "1--3", month = oct, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Nov 15 06:36:12 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Godfrey:2006:AAM, author = "Parke Godfrey and Ryan Shipley and Jarek Gryz", title = "Algorithms and analyses for maximal vector computation", journal = j-VLDB-J, volume = "16", number = "1", pages = "5--28", month = oct, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Nov 15 06:36:12 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Larson:2006:VMO, author = "Per-{\AA}ke Larson and Jingren Zhou", title = "View matching for outer-join views", journal = j-VLDB-J, volume = "16", number = "1", pages = "29--53", month = oct, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Nov 15 06:36:12 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Markl:2006:CSE, author = "V. Markl and P. J. Haas and M. Kutsch and N. Megiddo and U. Srivastava and T. M. Tran", title = "Consistent selectivity estimation via maximum entropy", journal = j-VLDB-J, volume = "16", number = "1", pages = "55--76", month = oct, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Nov 15 06:36:12 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ghoting:2006:CCF, author = "Amol Ghoting and Gregory Buehrer and Srinivasan Parthasarathy and Daehyun Kim and Anthony Nguyen and Yen-Kuang Chen and Pradeep Dubey", title = "Cache-conscious frequent pattern mining on modern and emerging processors", journal = j-VLDB-J, volume = "16", number = "1", pages = "77--96", month = oct, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Nov 15 06:36:12 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lee:2006:ETS, author = "Yoonkyong Lee and Mayssam Sayyadian and AnHai Doan and Arnon S. Rosenthal", title = "{eTuner}: tuning schema matching software using synthetic scenarios", journal = j-VLDB-J, volume = "16", number = "1", pages = "97--122", month = oct, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Nov 15 06:36:12 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Burdick:2006:OUI, author = "Doug Burdick and Prasad M. Deshpande and T. S. Jayram and Raghu Ramakrishnan and Shivakumar Vaithyanathan", title = "{OLAP} over uncertain and imprecise data", journal = j-VLDB-J, volume = "16", number = "1", pages = "123--144", month = oct, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Nov 15 06:36:12 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Haftmann:2006:FER, author = "Florian Haftmann and Donald Kossmann and Eric Lo", title = "A framework for efficient regression tests on database applications", journal = j-VLDB-J, volume = "16", number = "1", pages = "145--164", month = oct, year = "2006", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Nov 15 06:36:12 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Haas:2007:SIB, author = "Laura M. Haas and Christian S. Jensen and Martin L. Kersten", title = "Special issue: best papers of {VLDB} 2005", journal = j-VLDB-J, volume = "16", number = "1", pages = "1--3", month = jan, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:22 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Godfrey:2007:AAM, author = "Parke Godfrey and Ryan Shipley and Jarek Gryz", title = "Algorithms and analyses for maximal vector computation", journal = j-VLDB-J, volume = "16", number = "1", pages = "5--28", month = jan, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:22 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The maximal vector problem is to identify the maximals over a collection of vectors. This arises in many contexts and, as such, has been well studied. The problem recently gained renewed attention with skyline queries for relational databases and with work to develop skyline algorithms that are external and relationally well behaved. While many algorithms have been proposed, how they perform has been unclear. We study the performance of, and design choices behind, these algorithms. We prove runtime bounds based on the number of vectors $N$ and the dimensionality $K$. Early algorithms based on {\em divide and conquer\/} established seemingly good average and worst-case asymptotic runtimes. In fact, the problem can be solved in \mathcal{O}(KN) average-case (holding $K$ as fixed). We prove, however, that the performance is quite bad with respect to $K$. We demonstrate that the more recent skyline algorithms are better behaved, and can also achieve $ \mathcal {O}(K N)$ average-case. While $K$ matters for these, in practice, its effect vanishes in the asymptotic. We introduce a new external algorithm, LESS, that is more efficient and better behaved. We evaluate LESS's effectiveness and improvement over the field, and prove that its average-case running time is $ \mathcal {O}(K N)$.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Larson:2007:VMO, author = "Per-{\AA}ke Larson and Jingren Zhou", title = "View matching for outer-join views", journal = j-VLDB-J, volume = "16", number = "1", pages = "29--53", month = jan, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:22 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Prior work on computing queries from materialized views has focused on views defined by expressions consisting of selection, projection, and inner joins, with an optional aggregation on top (SPJG views). This paper provides a view matching algorithm for views that may also contain outer joins (SPOJG views). The algorithm relies on a normal form for outer-join expressions and is not based on bottom-up syntactic matching of expressions. It handles any combination of inner and outer joins, deals correctly with SQL bag semantics, and exploits not-null constraints, uniqueness constraints and foreign key constraints.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "aggregation; materialized views; outer joins; query processing; view matching", } @Article{Markl:2007:CSE, author = "V. Markl and P. J. Haas and M. Kutsch and N. Megiddo and U. Srivastava and T. M. Tran", title = "Consistent selectivity estimation via maximum entropy", journal = j-VLDB-J, volume = "16", number = "1", pages = "55--76", month = jan, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:22 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Cost-based query optimizers need to estimate the selectivity of conjunctive predicates when comparing alternative query execution plans. To this end, advanced optimizers use multivariate statistics to improve information about the joint distribution of attribute values in a table. The joint distribution for all columns is almost always too large to store completely, and the resulting use of partial distribution information raises the possibility that multiple, non-equivalent selectivity estimates may be available for a given predicate. Current optimizers use cumbersome ad hoc methods to ensure that selectivities are estimated in a consistent manner. These methods ignore valuable information and tend to bias the optimizer toward query plans for which the least information is available, often yielding poor results. In this paper we present a novel method for consistent selectivity estimation based on the principle of maximum entropy (ME). Our method exploits all available information and avoids the bias problem. In the absence of detailed knowledge, the ME approach reduces to standard uniformity and independence assumptions. Experiments with our prototype implementation in DB2 UDB show that use of the ME approach can improve the optimizer's cardinality estimates by orders of magnitude, resulting in better plan quality and significantly reduced query execution times. For almost all queries, these improvements are obtained while adding only tens of milliseconds to the overall time required for query optimization.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ghoting:2007:CCF, author = "Amol Ghoting and Gregory Buehrer and Srinivasan Parthasarathy and Daehyun Kim and Anthony Nguyen and Yen-Kuang Chen and Pradeep Dubey", title = "Cache-conscious frequent pattern mining on modern and emerging processors", journal = j-VLDB-J, volume = "16", number = "1", pages = "77--96", month = jan, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:22 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Algorithms are typically designed to exploit the current state of the art in processor technology. However, as processor technology evolves, said algorithms are often unable to derive the maximum achievable performance on these modern architectures. In this paper, we examine the performance of frequent pattern mining algorithms on a modern processor. A detailed performance study reveals that even the best frequent pattern mining implementations, with highly efficient memory managers, still grossly under-utilize a modern processor. The primary performance bottlenecks are {\em poor data locality\/} and {\em low instruction level parallelism (ILP)}. We propose a {\em cache-conscious prefix tree\/} to address this problem. The resulting tree improves spatial locality and also enhances the benefits from hardware cache line prefetching. Furthermore, the design of this data structure allows the use of {\em path tiling}, a novel tiling strategy, to improve temporal locality. The result is an overall speedup of up to 3.2 when compared with state of the art implementations. We then show how these algorithms can be improved further by realizing a non-naive thread-based decomposition that targets {\em simultaneously multi-threaded processors (SMT)}. A key aspect of this decomposition is to ensure cache re-use between threads that are co-scheduled at a fine granularity. This optimization affords an additional speedup of 50\%, resulting in an overall speedup of up to 4.8. The proposed optimizations also provide performance improvements on SMPs, and will most likely be beneficial on emerging processors.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "architecture-conscious algorithms; association rule mining; cache-conscious data mining; frequent itemset mining; frequent pattern mining", } @Article{Lee:2007:ETS, author = "Yoonkyong Lee and Mayssam Sayyadian and AnHai Doan and Arnon S. Rosenthal", title = "{eTuner}: tuning schema matching software using synthetic scenarios", journal = j-VLDB-J, volume = "16", number = "1", pages = "97--122", month = jan, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:22 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Most recent schema matching systems assemble {\em multiple components}, each employing a particular matching technique. The domain user must then {\em tune\/} the system: select the right component to be executed and correctly adjust their numerous ``knobs'' (e.g., thresholds, formula coefficients). Tuning is skill and time intensive, but (as we show) without it the matching accuracy is significantly inferior. We describe eTuner, an approach to {\em automatically\/} tune schema matching systems. Given a schema $S$, we match $S$ against synthetic schemas, for which the ground truth mapping is known, and find a tuning that demonstrably improves the performance of matching $S$ against real schemas. To efficiently search the huge space of tuning configurations, eTuner works sequentially, starting with tuning the lowest level components. To increase the applicability of eTuner, we develop methods to tune a broad range of matching components. While the tuning process is completely automatic, eTuner can also exploit user assistance (whenever available) to further improve the tuning quality. We employed eTuner to tune four recently developed matching systems on several real-world domains. The results show that eTuner produced tuned matching systems that achieve higher accuracy than using the systems with currently possible tuning methods.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "compositional approach; machine learning; schema matching; synthetic schemas; tuning", } @Article{Burdick:2007:OUI, author = "Doug Burdick and Prasad M. Deshpande and T. S. Jayram and Raghu Ramakrishnan and Shivakumar Vaithyanathan", title = "{OLAP} over uncertain and imprecise data", journal = j-VLDB-J, volume = "16", number = "1", pages = "123--144", month = jan, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:22 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We extend the OLAP data model to represent data ambiguity, specifically imprecision and uncertainty, and introduce an allocation-based approach to the semantics of aggregation queries over such data. We identify three natural query properties and use them to shed light on alternative query semantics. While there is much work on representing and querying ambiguous data, to our knowledge this is the first paper to handle both imprecision and uncertainty in an OLAP setting.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "aggregation; ambiguous; imprecision; uncertainty", } @Article{Haftmann:2007:FER, author = "Florian Haftmann and Donald Kossmann and Eric Lo", title = "A framework for efficient regression tests on database applications", journal = j-VLDB-J, volume = "16", number = "1", pages = "145--164", month = jan, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:22 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Regression testing is an important software maintenance activity to ensure the integrity of a software after modification. However, most methods and tools developed for software testing today do not work well for database applications; these tools only work well if applications are stateless or tests can be designed in such a way that they do not alter the state. To execute tests for database applications efficiently, the challenge is to control the state of the database during testing and to order the test runs such that expensive database {\em reset\/} operations that bring the database into the right state need to be executed as seldom as possible. This work devises a regression testing framework for database applications so that test runs can be executed in parallel. The goal is to achieve linear speed-up and/or exploit the available resources as well as possible. This problem is challenging because parallel testing needs to consider both load balancing and controlling the state of the database. Experimental results show that test run execution can achieve linear speed-up by using the proposed framework.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "database applications; regression tests", } @Article{Tanin:2007:UDQ, author = "Egemen Tanin and Aaron Harwood and Hanan Samet", title = "Using a distributed quadtree index in peer-to-peer networks", journal = j-VLDB-J, volume = "16", number = "2", pages = "165--178", month = apr, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:23 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Peer-to-peer (P2P) networks have become a powerful means for online data exchange. Currently, users are primarily utilizing these networks to perform exact-match queries and retrieve complete files. However, future more data intensive applications, such as P2P auction networks, P2P job-search networks, P2P multiplayer games, will require the capability to respond to more complex queries such as range queries involving numerous data types including those that have a spatial component. In this paper, a distributed quadtree index that adapts the MX-CIF quadtree is described that enables more powerful accesses to data in P2P networks. This index has been implemented for various prototype P2P applications and results of experiments are presented. Our index is easy to use, scalable, and exhibits good load-balancing properties. Similar indices can be constructed for various multidimensional data types with both spatial and non-spatial components.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "distributed data structures; peer-to-peer networks; quadtrees; spatial data structures", } @Article{Viqueira:2007:SES, author = "Jose R. Rios Viqueira and Nikos A. Lorentzos", title = "{SQL} extension for spatio-temporal data", journal = j-VLDB-J, volume = "16", number = "2", pages = "179--200", month = apr, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:23 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "An SQL extension is formalized for the management of spatio-temporal data, i.e. of spatial data that evolves with respect to time. The extension is dedicated to applications such as topography, cartography, and cadastral systems, hence it considers discrete changes both in space and in {\em time}. It is based on the rigid formalization of data types and of SQL constructs. Data types are defined in terms of time and {\em spatial quanta}. The SQL constructs are defined in terms of a kernel of {\em few\/} relational algebra operations, composed of the well-known operations of the 1NF model and of two more, {\em Unfold\/} and {\em Fold}. In conjunction with previous work, it enables the uniform management of 1NF structures that may contain not only spatio-temporal but also either purely temporal or purely spatial or conventional data. The syntax and semantics of the extension is fully consistent with the {SQL:2003} standard.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data modelling; spatial databases; spatio-temporal databases; SQL", } @Article{Dai:2007:CDC, author = "Bi-Ru Dai and Cheng-Ru Lin and Ming-Syan Chen", title = "Constrained data clustering by depth control and progressive constraint relaxation", journal = j-VLDB-J, volume = "16", number = "2", pages = "201--217", month = apr, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:23 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In order to import the domain knowledge or application-dependent parameters into the data mining systems, constraint-based mining has attracted a lot of research attention recently. In this paper, the attributes employed to model the constraints are called constraint attributes and those attributes involved in the objective function to be optimized are called optimization attributes. The constrained clustering considered in this paper is conducted in such a way that the objective function of optimization attributes is optimized subject to the condition that the imposed constraint is satisfied. Explicitly, we address the problem of constrained clustering with numerical constraints, in which the constraint attribute values of any two data items in the same cluster are required to be within the corresponding constraint range. This numerical constrained clustering problem, however, cannot be dealt with by any conventional clustering algorithms. Consequently, we devise several effective and efficient algorithms to solve such a clustering problem. It is noted that due to the intrinsic nature of the numerical constrained clustering, there is an order dependency on the process of attaining the clustering, which in many cases degrades the clustering results. In view of this, we devise a {\em progressive constraint relaxation\/} technique to remedy this drawback and improve the overall performance of clustering results. Explicitly, by using a smaller (tighter) constraint range in earlier iterations of merge, we will have more room to relax the constraint and seek for better solutions in subsequent iterations. It is empirically shown that the progressive constraint relaxation technique is able to improve not only the execution efficiency but also the clustering quality.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "constrained clustering; data clustering; data mining", } @Article{Shen:2007:ADD, author = "Heng Tao Shen and Xiaofang Zhou and Aoying Zhou", title = "An adaptive and dynamic dimensionality reduction method for high-dimensional indexing", journal = j-VLDB-J, volume = "16", number = "2", pages = "219--234", month = apr, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:23 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The notorious ``dimensionality curse'' is a well-known phenomenon for any multi-dimensional indexes attempting to scale up to high dimensions. One well-known approach to overcome degradation in performance with respect to increasing dimensions is to reduce the dimensionality of the original dataset before constructing the index. However, identifying the correlation among the dimensions and effectively reducing them are challenging tasks. In this paper, we present an adaptive {\em Multi-level Mahalanobis-based Dimensionality Reduction\/} (MMDR) technique for high-dimensional indexing. Our MMDR technique has four notable features compared to existing methods. First, it discovers elliptical clusters for more effective dimensionality reduction by using only the low-dimensional subspaces. Second, data points in the different axis systems are indexed using a single $ B^+$-tree. Third, our technique is highly scalable in terms of data size and dimension. Finally, it is also dynamic and adaptive to insertions. An extensive performance study was conducted using both real and synthetic datasets, and the results show that our technique not only achieves higher precision, but also enables queries to be processed efficiently.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "correlated clustering; dimensionality reduction; high-dimensional indexing; projection; subspace", } @Article{He:2007:PCC, author = "Zhen He and Alonso Marquez", title = "Path and cache conscious prefetching {(PCCP)}", journal = j-VLDB-J, volume = "16", number = "2", pages = "235--249", month = apr, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:23 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Main memory cache performance continues to play an important role in determining the overall performance of object-oriented, object-relational and XML databases. An effective method of improving main memory cache performance is to prefetch or pre-load pages in advance to their usage, in anticipation of main memory cache misses. In this paper we describe a framework for creating prefetching algorithms with the novel features of path and cache consciousness. Path consciousness refers to the use of short sequences of object references at key points in the reference trace to identify paths of navigation. Cache consciousness refers to the use of historical page access knowledge to guess which pages are likely to be main memory cache resident most of the time and then assumes these pages do not exist in the context of prefetching. We have conducted a number of experiments comparing our approach against four highly competitive prefetching algorithms. The results shows our approach outperforms existing prefetching techniques in some situations while performing worse in others. We provide guidelines as to when our algorithm should be used and when others maybe more desirable.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "caching; clustering; databases; prefetching", } @Article{Yu:2007:MBS, author = "Hailing Yu and Divyakant Agrawal and Amr {El Abbadi}", title = "{MEMS} based storage architecture for relational databases", journal = j-VLDB-J, volume = "16", number = "2", pages = "251--268", month = apr, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:23 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Due to recent advances in semiconductor manufacturing, the gap between main memory and disks is constantly increasing. This leads to a significant performance bottleneck for Relational Database Management Systems. Recent advances in nanotechnology have led to the invention of MicroElectroMechanical Systems (MEMS) based storage technology to replace disks. In this paper, we exploit the physical characteristics of MEMS-based storage devices to develop a placement scheme for relational data that enables retrieval in both row-wise and column-wise manner. We develop algorithms for different relational operations based on this data layout. Our experimental results and analysis demonstrate that this data layout not only improves I/O utilization, but results in better cache performance for a variety of different relational operations.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data placement; MEMS; relational databases; storage", } @Article{Yiannis:2007:CTF, author = "John Yiannis and Justin Zobel", title = "Compression techniques for fast external sorting", journal = j-VLDB-J, volume = "16", number = "2", pages = "269--291", month = apr, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:23 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "External sorting of large files of records involves use of disk space to store temporary files, processing time for sorting, and transfer time between CPU, cache, memory, and disk. Compression can reduce disk and transfer costs, and, in the case of external sorts, cut merge costs by reducing the number of runs. It is therefore plausible that overall costs of external sorting could be reduced through use of compression. In this paper, we propose new compression techniques for data consisting of sets of records. The best of these techniques, based on building a trie of variable-length common strings, provides fast compression and decompression and allows random access to individual records. We show experimentally that our trie-based compression leads to significant reduction in sorting costs; that is, it is faster to compress the data, sort it, and then decompress it than to sort the uncompressed data. While the degree of compression is not quite as great as can be obtained with adaptive techniques such as Lempel--Ziv methods, these cannot be applied to sorting. Our experiments show that, in comparison to approaches such as Huffman coding of fixed-length substrings, our novel trie-based method is faster and provides greater size reductions.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "external sorting; query evaluation; semi-static compression; sorting", } @Article{Jermaine:2007:PEF, author = "Christopher Jermaine and Edward Omiecinski and Wai Gen Yee", title = "The partitioned exponential file for database storage management", journal = j-VLDB-J, volume = "16", number = "4", pages = "417--437", month = oct, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:25 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The rate of increase in hard disk storage capacity continues to outpace the rate of decrease in hard disk seek time. This trend implies that the value of a seek is increasing exponentially relative to the value of storage.\par With this trend in mind, we introduce the partitioned exponential file (PE file) which is a generic storage manager that can be customized for many different types of data (e.g., numerical, spatial, or temporal). The PE file is intended for use in environments with intense update loads and concurrent, analytic queries. Such an environment may be found, for example, in long-running scientific applications which can produce petabytes of data. For example, the proposed Large Synoptic Survey Telescope [36] will produce 50---100 petabytes of observational, scientific data over its multi-year lifetime. This database will never be taken off-line, so bursty update loads of tens of terabytes per day must be handled concurrently with data analysis. In the PE file, data are organized as a series of on-disk sorts with a careful, global organization. Because the PE file relies heavily on sequential I/O, only a fraction of a disk seek is required for a typical record insertion or retrieval.\par In addition to describing the PE file, we also detail a set of benchmarking experiments for T1SM, which is a PE file customized for use with multi-attribute data records ordered on a single numerical attribute. In our benchmarking, we implement and test many competing data organizations that can be used to index and store such data, such as the B+-Tree, the LSM-Tree, the Buffer Tree, the Stepped Merge Method, and the Y-Tree. As expected, no organization is the best over all benchmarks, but our experiments show that T1SM is the best choice in many situations, suggesting that it is the best overall. Specifically, T1SM performs exceptionally well in the case of a heavy query workload that must be handled concurrently with an intense insertion stream. Our experiments show that T1SM (and its close cousin, the T2SM storage manager for spatial data) can handle very heavy mixed workloads of this type, and still maintain acceptably small query latencies.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data warehousing; indexing; storage management", } @Article{Deligiannakis:2007:DCH, author = "Antonios Deligiannakis and Yannis Kotidis and Nick Roussopoulos", title = "Dissemination of compressed historical information in sensor networks", journal = j-VLDB-J, volume = "16", number = "4", pages = "439--461", month = oct, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:25 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Sensor nodes are small devices that `measure' their environment and communicate feeds of low-level data values to a base station for further processing and archiving. Dissemination of these multi-valued feeds is challenging because of the limited resources (processing, bandwidth, energy) available in the nodes of the network. In this paper, we first describe the SBR algorithm for compressing multi-valued feeds containing historical data from each sensor. The key to our technique is the base signal, a series of values extracted from the real measurements that is used to provide piece-wise approximation of the measurements. While our basic technique exploits correlations among measurements taken on a single node, we further show how it can be adapted to exploit correlations among multiple nodes in a localized setting. Sensor nodes may form clusters and, within a cluster, a group leader identifies and coalesces similar measurements taken by different nodes. This localized mode of operation further improves the accuracy of the approximation, typically by a factor from 5 to 15. We provide detailed experiments of our algorithms and make direct comparisons against standard approximation techniques like Wavelets, Histograms and the Discrete Cosine Transform, on a variety of error metrics and for real data sets from different domains.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "compression; sensor networks", } @Article{Bohm:2007:FRA, author = "Klemens B{\"o}hm and Erik Buchmann", title = "Free riding-aware forwarding in {Content-Addressable Networks}", journal = j-VLDB-J, volume = "16", number = "4", pages = "463--482", month = oct, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:25 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Research on P2P data structures has tacitly assumed that peers readily participate in the work, i.e., are cooperative. But such participation is voluntary, and free riding is the dominant strategy. This article describes a protocol that renders free riding unattractive, for one particular P2P data structure. The protocol is based on feedback that adjacent nodes exchange. This induces transitive logical networks of nodes that rule out uncooperative peers. The protocol uses proofs of work to deter free riding. To show that cooperative behavior dominates, we have come up with a cost model that quantifies the overall cost of peers, depending on their degree of cooperativeness and many other parameters. The cost model tells us that we can achieve a good discrimination against peers that are less cooperative, with moderate additional cost for cooperative peers. Extensive experiments confirm the validity of our approach.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "distributed hashtables; free riding; incentives; peer-to-peer; reputation", } @Article{Traina:2007:OFA, author = "Caetano {Traina, Jr.} and Roberto F. Filho and Agma J. Traina and Marcos R. Vieira and Christos Faloutsos", title = "The {Omni-family} of all-purpose access methods: a simple and effective way to make similarity search more efficient", journal = j-VLDB-J, volume = "16", number = "4", pages = "483--505", month = oct, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:25 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Similarity search operations require executing expensive algorithms, and although broadly useful in many new applications, they rely on specific structures not yet supported by commercial DBMS. In this paper we discuss the new Omni-technique, which allows to build a variety of dynamic Metric Access Methods based on a number of selected objects from the dataset, used as global reference objects. We call them as the Omni-family of metric access methods. This technique enables building similarity search operations on top of existing structures, significantly improving their performance, regarding the number of disk access and distance calculations. Additionally, our methods scale up well, exhibiting sub-linear behavior with growing database size.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "index structures; metric access methods; multimedia databases; similarity search", } @Article{Khan:2007:NID, author = "Latifur Khan and Mamoun Awad and Bhavani Thuraisingham", title = "A new intrusion detection system using support vector machines and hierarchical clustering", journal = j-VLDB-J, volume = "16", number = "4", pages = "507--521", month = oct, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:25 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Whenever an intrusion occurs, the security and value of a computer system is compromised. Network-based attacks make it difficult for legitimate users to access various network services by purposely occupying or sabotaging network resources and services. This can be done by sending large amounts of network traffic, exploiting well-known faults in networking services, and by overloading network hosts. Intrusion Detection attempts to detect computer attacks by examining various data records observed in processes on the network and it is split into two groups, anomaly detection systems and misuse detection systems. Anomaly detection is an attempt to search for malicious behavior that deviates from established normal patterns. Misuse detection is used to identify intrusions that match known attack scenarios. Our interest here is in anomaly detection and our proposed method is a scalable solution for detecting network-based anomalies. We use Support Vector Machines (SVM) for classification. The SVM is one of the most successful classification algorithms in the data mining area, but its long training time limits its use. This paper presents a study for enhancing the training time of SVM, specifically when dealing with large data sets, using hierarchical clustering analysis. We use the Dynamically Growing Self-Organizing Tree (DGSOT) algorithm for clustering because it has proved to overcome the drawbacks of traditional hierarchical clustering algorithms (e.g., hierarchical agglomerative clustering). Clustering analysis helps find the boundary points, which are the most qualified data points to train SVM, between two classes. We present a new approach of combination of SVM and DGSOT, which starts with an initial training set and expands it gradually using the clustering structure produced by the DGSOT algorithm. We compare our approach with the Rocchio Bundling technique and random selection in terms of accuracy loss and training time gain using a single benchmark real data set. We show that our proposed variations contribute significantly in improving the training process of SVM with high generalization accuracy and outperform the Rocchio Bundling technique.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Dalvi:2007:EQE, author = "Nilesh Dalvi and Dan Suciu", title = "Efficient query evaluation on probabilistic databases", journal = j-VLDB-J, volume = "16", number = "4", pages = "523--544", month = oct, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:25 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We describe a framework for supporting arbitrarily complex SQL queries with `uncertain' predicates. The query semantics is based on a probabilistic model and the results are ranked, much like in Information Retrieval. Our main focus is query evaluation. We describe an optimization algorithm that can compute efficiently most queries. We show, however, that the data complexity of some queries is \#P-complete, which implies that these queries do not admit any efficient evaluation methods. For these queries we describe both an approximation algorithm and a Monte-Carlo simulation algorithm.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Croft:2008:ISI, author = "W. Bruce Croft and Hans-J. Schek", title = "Introduction to the special issue on database and information retrieval integration", journal = j-VLDB-J, volume = "17", number = "1", pages = "1--3", month = jan, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:26 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Roelleke:2008:MRM, author = "Thomas Roelleke and Hengzhi Wu and Jun Wang and Hany Azzam", title = "Modelling retrieval models in a probabilistic relational algebra with a new operator: the relational {Bayes}", journal = j-VLDB-J, volume = "17", number = "1", pages = "5--37", month = jan, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:26 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This paper presents a probabilistic relational modelling (implementation) of the major probabilistic retrieval models. Such a high-level implementation is useful since it supports the ranking of any object, it allows for the reasoning across structured and unstructured data, and it gives the software (knowledge) engineer control over ranking and thus supports customisation. The contributions of this paper include the specification of probabilistic SQL (PSQL) and probabilistic relational algebra (PRA), a new relational operator for probability estimation (the relational Bayes), the probabilistic relational modelling of retrieval models, a comparison of modelling retrieval with traditional SQL versus modelling retrieval with PSQL, and a comparison of the performance of probability estimation with traditional SQL versus PSQL. The main findings are that the PSQL/PRA paradigm allows for the description of advanced retrieval models, is suitable for solving large-scale retrieval tasks, and outperforms traditional SQL in terms of abstraction and performance regarding probability estimation.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "DB + IR integration; probabilistic databases; probabilistic relational modelling; retrieval models", } @Article{Schmitt:2008:QDQ, author = "Ingo Schmitt", title = "{QQL}: {A DB\&IR Query Language}", journal = j-VLDB-J, volume = "17", number = "1", pages = "39--56", month = jan, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:26 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Traditional database query languages are based on set theory and crisp first order logic. However, many applications require retrieval-like queries which return result objects associated with a degree of being relevant to the query. Historically, retrieval systems estimate relevance by exploiting hidden object semantics whereas query processing in database systems relies on matching select-conditions with attribute values. Thus, different mechanisms were developed for database and information retrieval systems. In consequence, there is a lack of support for queries involving both retrieval and database search terms. In this work, we introduce the quantum query language (QQL). Its underlying unifying theory is based on the mathematical formalism of quantum mechanics and quantum logic. Van Rijsbergen already discussed the strong relation between the formalism of quantum mechanics and information retrieval. In this work, we interrelate concepts from database query processing to concepts from quantum mechanics and logic. As result, we obtain a common theory which allows us to incorporate seamlessly retrieval search into traditional database query processing.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "database query language; DB \& IR; information retrieval", } @Article{Lau:2008:MRM, author = "Ho Lam Lau and Wilfred Ng", title = "A multi-ranker model for adaptive {XML} searching", journal = j-VLDB-J, volume = "17", number = "1", pages = "57--80", month = jan, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:26 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The evolution of computing technology suggests that it has become more feasible to offer access to Web information in a ubiquitous way, through various kinds of interaction devices such as PCs, laptops, palmtops, and so on. As XML has become a de-facto standard for exchanging Web data, an interesting and practical research problem is the development of models and techniques to satisfy various needs and preferences in searching XML data. In this paper, we employ a list of simple XML tagged keywords as a vehicle for searching XML fragments in a collection of XML documents. In order to deal with the diversified nature of XML documents as well as user preferences, we propose a novel multi-ranker model (MRM), which is able to abstract a spectrum of important XML properties and adapt the features to different XML search needs. The MRM is composed of three ranking levels. The lowest level consists of two categories of similarity and granularity features. At the intermediate level, we define four tailored XML rankers (XRs), which consist of different lower level features and have different strengths in searching XML fragments. The XRs are trained via a learning mechanism called the Ranking Support Vector Machine in a voting Spy Na{\"\i}ve Bayes framework (RSSF). The RSSF takes as input a set of labeled fragments and feature vectors and generates as output Adaptive Rankers (ARs) in the learning process. The ARs are defined over the XRs and generated at the top level of the MRM. We show empirically that the RSSF is able to improve the MRM significantly in the learning process that needs only a small set of training XML fragments. We demonstrate that the trained MRM is able to bring out the strengths of the XRs in order to adapt different preferences and queries.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Theobald:2008:TEV, author = "Martin Theobald and Holger Bast and Debapriyo Majumdar and Ralf Schenkel and Gerhard Weikum", title = "{TopX}: efficient and versatile top-$k$ query processing for semistructured data", journal = j-VLDB-J, volume = "17", number = "1", pages = "81--115", month = jan, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:26 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Recent IR extensions to XML query languages such as Xpath 1.0 Full-Text or the NEXI query language of the INEX benchmark series reflect the emerging interest in IR-style ranked retrieval over semistructured data. TopX is a top-$k$ retrieval engine for text and semistructured data. It terminates query execution as soon as it can safely determine the $k$ top-ranked result elements according to a monotonic score aggregation function with respect to a multidimensional query. It efficiently supports vague search on both content- and structure-oriented query conditions for dynamic query relaxation with controllable influence on the result ranking. The main contributions of this paper unfold into four main points: (1) fully implemented models and algorithms for ranked XML retrieval with XPath Full-Text functionality, (2) efficient and effective top-$k$ query processing for semistructured data, (3) support for integrating thesauri and ontologies with statistically quantified relationships among concepts, leveraged for word-sense disambiguation and query expansion, and (4) a comprehensive description of the TopX system, with performance experiments on large-scale corpora like TREC TeraByte and INEX Wikipedia.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "content- and structure-aware ranking; cost-based index access scheduling; DB{\&} dynamic query expansion; efficient XML full-text search; IR integration; probabilistic candidate pruning; top-$k$ query processing", } @Article{Simitsis:2008:PUK, author = "Alkis Simitsis and Georgia Koutrika and Yannis Ioannidis", title = "Pr{\'e}cis: from unstructured keywords as queries to structured databases as answers", journal = j-VLDB-J, volume = "17", number = "1", pages = "117--149", month = jan, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:26 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Pr{\'e}cis queries represent a novel way of accessing data, which combines ideas and techniques from the fields of databases and information retrieval. They are free-form, keyword-based, queries on top of relational databases that generate entire multi-relation databases, which are logical subsets of the original ones. A logical subset contains not only items directly related to the given query keywords but also items implicitly related to them in various ways, with the purpose of providing to the user much greater insight into the original data. In this paper, we lay the foundations for the concept of logical database subsets that are generated from pr{\'e}cis queries under a generalized perspective that removes several restrictions of previous work. In particular, we extend the semantics of pr{\'e}cis queries considering that they may contain multiple terms combined through the AND, OR, and NOT operators. On the basis of these extended semantics, we define the concept of a logical database subset, we identify the one that is most relevant to a given query, and we provide algorithms for its generation. Finally, we present an extensive set of experimental results that demonstrate the efficiency and benefits of our approach.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "free-from queries; keyword search; query processing", } @Article{Cornacchia:2008:FEI, author = "Roberto Cornacchia and S{\'a}ndor H{\'e}man and Marcin Zukowski and Arjen P. Vries and Peter Boncz", title = "Flexible and efficient {IR} using array databases", journal = j-VLDB-J, volume = "17", number = "1", pages = "151--168", month = jan, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:26 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The Matrix Framework is a recent proposal by Information Retrieval (IR) researchers to flexibly represent information retrieval models and concepts in a single multi-dimensional array framework. We provide computational support for exactly this framework with the array database system SRAM (Sparse Relational Array Mapping), that works on top of a DBMS. Information retrieval models can be specified in its comprehension-based array query language, in a way that directly corresponds to the underlying mathematical formulas. SRAM efficiently stores sparse arrays in (compressed) relational tables and translates and optimizes array queries into relational queries. In this work, we describe a number of array query optimization rules. To demonstrate their effect on text retrieval, we apply them in the TREC TeraByte track (TREC-TB) efficiency task, using the Okapi BM25 model as our example. It turns out that these optimization rules enable SRAM to automatically translate the BM25 array queries into the relational equivalent of inverted list processing including compression, score materialization and quantization, such as employed by custom-built IR systems. The use of the high-performance MonetDB/X100 relational backend, that provides transparent database compression, allows the system to achieve very fast response times with good precision and low resource usage.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "array databases; database compression; information retrieval; query optimization", } @Article{Lockemann:2008:MKR, author = "Peter C. Lockemann", title = "In memoriam {Klaus R. Dittrich} (1950---2007)", journal = j-VLDB-J, volume = "17", number = "2", pages = "169--170", month = mar, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:27 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Alonso:2008:GEM, author = "Gustavo Alonso and David Lomet and Umesh Dayal", title = "Guest {Editors}' message", journal = j-VLDB-J, volume = "17", number = "2", pages = "171--172", month = mar, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:27 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Gemulla:2008:MBS, author = "Rainer Gemulla and Wolfgang Lehner and Peter J. Haas", title = "Maintaining bounded-size sample synopses of evolving datasets", journal = j-VLDB-J, volume = "17", number = "2", pages = "173--201", month = mar, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:27 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Perhaps the most flexible synopsis of a database is a uniform random sample of the data; such samples are widely used to speed up processing of analytic queries and data-mining tasks, enhance query optimization, and facilitate information integration. The ability to bound the maximum size of a sample can be very convenient from a system-design point of view, because the task of memory management is simplified, especially when many samples are maintained simultaneously. In this paper, we study methods for incrementally maintaining a bounded-size uniform random sample of the items in a dataset in the presence of an arbitrary sequence of insertions and deletions. For `stable' datasets whose size remains roughly constant over time, we provide a novel sampling scheme, called `random pairing' (RP), that maintains a bounded-size uniform sample by using newly inserted data items to compensate for previous deletions. The RP algorithm is the first extension of the 45-year-old reservoir sampling algorithm to handle deletions; RP reduces to the `passive' algorithm of Babcock et al. when the insertions and deletions correspond to a moving window over a data stream. Experiments show that, when dataset-size fluctuations over time are not too extreme, RP is the algorithm of choice with respect to speed and sample-size stability. For `growing' datasets, we consider algorithms for periodically resizing a bounded-size random sample upwards. We prove that any such algorithm cannot avoid accessing the base data, and provide a novel resizing algorithm that minimizes the time needed to increase the sample size. We also show how to merge uniform samples from disjoint datasets to obtain a uniform sample of the union of the datasets; the merged sample can be incrementally maintained. Our new RPMerge algorithm extends the HRMerge algorithm of Brown and Haas to effectively deal with deletions, thereby facilitating efficient parallel sampling.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "database sampling; reservoir sampling; sample maintenance; synopsis", } @Article{Yu:2008:XSR, author = "Cong Yu and H. V. Jagadish", title = "{XML} schema refinement through redundancy detection and normalization", journal = j-VLDB-J, volume = "17", number = "2", pages = "203--223", month = mar, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:27 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "As XML becomes increasingly popular, XML schema design has become an increasingly important issue. One of the central objectives of good schema design is to avoid data redundancies: redundantly stored information can lead not just only to a higher data storage cost but also to increased costs for data transfer and data manipulation. Furthermore, such data redundancies can lead to potential update anomalies, rendering the database inconsistent. One strategy to avoid data redundancies is to design redundancy-free schema from the start on the basis of known functional dependencies. We observe that XML databases are often `casually designed' and XML FDs may not be determined in advance. Under such circumstances, discovering XML data redundancies from the data itself becomes necessary and is an integral part of the schema refinement (or re-design) process. We present the design and implementation of the first system, DiscoverXFD, for efficient discovery of XML data redundancies. It employs a novel XML data structure and introduces a new class of partition-based algorithms. The XML data redundancies are defined on the basis of a new notion of XML functional dependency (XML FD) that (1) extends previous notions by incorporating set elements into the XML FD specification, and (2) maintains tuple-based semantics through the novel concept of Generalized Tree Tuple (GTT). Using this comprehensive XML FD notion, we introduce a new normal form (GTT-XNF) for XML documents, and provide comprehensive comparisons with previous studies. Given the set of data redundancies (in the form of redundancy-indicating XML FDs) discovered by DiscoverXFD, we describe a normalization algorithm for converting any original XML schema into one in GTT-XNF.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data redundancy; functional dependency; normal form; schema design; XML", } @Article{Mitra:2008:TKS, author = "Soumyadeb Mitra and Marianne Winslett and Windsor W. Hsu and Kevin Chen-Chuan Chang", title = "Trustworthy keyword search for compliance storage", journal = j-VLDB-J, volume = "17", number = "2", pages = "225--242", month = mar, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:27 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Intense regulatory focus on secure retention of electronic records has led to a need to ensure that records are trustworthy, i.e., able to provide irrefutable proof and accurate details of past events. In this paper, we analyze the requirements for a trustworthy index to support keyword-based search queries. We argue that trustworthy index entries must be durable--the index must be updated when new documents arrive, and not periodically deleted and rebuilt. To this end, we propose a scheme for efficiently updating an inverted index, based on judicious merging of the posting lists of terms. Through extensive simulations and experiments with two real world data sets and workloads, we demonstrate that the scheme achieves online update speed while maintaining good query performance. We also present and evaluate jump indexes, a novel trustworthy and efficient index for join operations on posting lists for multi-keyword queries. Jump indexes support insert, lookup and range queries in time logarithmic in the number of indexed documents.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "compliance storage; inverted index; jump index", } @Article{Benjelloun:2008:DUL, author = "Omar Benjelloun and Anish Das Sarma and Alon Halevy and Martin Theobald and Jennifer Widom", title = "Databases with uncertainty and lineage", journal = j-VLDB-J, volume = "17", number = "2", pages = "243--264", month = mar, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:27 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This paper introduces uldbs, an extension of relational databases with simple yet expressive constructs for representing and manipulating both lineage and uncertainty. Uncertain data and data lineage are two important areas of data management that have been considered extensively in isolation, however many applications require the features in tandem. Fundamentally, lineage enables simple and consistent representation of uncertain data, it correlates uncertainty in query results with uncertainty in the input data, and query processing with lineage and uncertainty together presents computational benefits over treating them separately. We show that the uldb representation is complete, and that it permits straightforward implementation of many relational operations. We define two notions of uldb minimality--data-minimal and lineage-minimal--and study minimization of uldb representations under both notions. With lineage, derived relations are no longer self-contained: their uncertainty depends on uncertainty in the base data. We provide an algorithm for the new operation of extracting a database subset in the presence of interconnected uncertainty. We also show how uldbs enable a new approach to query processing in probabilistic databases. Finally, we describe the current state of the Trio system, our implementation of uldbs under development at Stanford.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "lineage; probabilistic data management; provenance; uncertainty in databases", } @Article{Jeffery:2008:ARM, author = "Shawn R. Jeffery and Michael J. Franklin and Minos Garofalakis", title = "An adaptive {RFID} middleware for supporting metaphysical data independence", journal = j-VLDB-J, volume = "17", number = "2", pages = "265--289", month = mar, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:27 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Sensor devices produce data that are unreliable, low-level, and seldom able to be used directly by applications. In this paper, we propose metaphysical data independence (MDI), a layer of independence that shields applications from the challenges that arise when interacting directly with sensor devices. The key philosophy behind MDI is that applications do not deal with any aspect of physical device data, but rather interface with a high-level reconstruction of the physical world created by a sensor infrastructure. As a concrete instantiation of MDI in such a sensor infrastructure, we detail MDI-SMURF, a Radio Frequency Identification (RFID) middleware system that alleviates issues associated with using RFID data through adaptive techniques based on a novel statistical framework.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "data cleaning; RFID technology; sensor-based applications; statistical sampling", } @Article{Parreira:2008:JAP, author = "Josiane Xavier Parreira and Carlos Castillo and Debora Donato and Sebastian Michel and Gerhard Weikum", title = "The {Juxtaposed} approximate {PageRank} method for robust {PageRank} approximation in a peer-to-peer web search network", journal = j-VLDB-J, volume = "17", number = "2", pages = "291--313", month = mar, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0057-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat May 8 18:33:08 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/pagerank.bib; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We present Juxtaposed approximate PageRank (JXP), a distributed algorithm for computing PageRank-style authority scores of Web pages on a peer-to-peer (P2P) network. Unlike previous algorithms, JXP allows peers to have overlapping content and requires no a priori knowledge of other peers' content. Our algorithm combines locally computed authority scores with information obtained from other peers by means of random meetings among the peers in the network. This computation is based on a Markov-chain state-lumping technique, and iteratively approximates global authority scores. The algorithm scales with the number of peers in the network and we show that the JXP scores converge to the true PageRank scores that one would obtain with a centralized algorithm. Finally, we show how to deal with misbehaving peers by extending JXP with a reputation model.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "link analysis; Markov chain aggregation; peer-to-peer systems; social reputation; Web graph", } @Article{Narayanan:2008:DAQ, author = "Dushyanth Narayanan and Austin Donnelly and Richard Mortier and Antony Rowstron", title = "Delay aware querying with {Seaweed}", journal = j-VLDB-J, volume = "17", number = "2", pages = "315--331", month = mar, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:27 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Large highly distributed data sets are poorly supported by current query technologies. Applications such as endsystem-based network management are characterized by data stored on large numbers of endsystems, with frequent local updates and relatively infrequent global one-shot queries. The challenges are scale ($ 10^3 $ to $ 10^9 $ endsystems) and endsystem unavailability. In such large systems, a significant fraction of endsystems and their data will be unavailable at any given time. Existing methods to provide high data availability despite endsystem unavailability involve centralizing, redistributing or replicating the data. At large scale these methods are not scalable. We advocate a design that trades query delay for completeness, incrementally returning results as endsystems become available. We also introduce the idea of completeness prediction, which provides the user with explicit feedback about this delay/completeness trade-off. Completeness prediction is based on replication of compact data summaries and availability models. This metadata is orders of magnitude smaller than the data. Seaweed is a scalable query infrastructure supporting incremental results, online in-network aggregation and completeness prediction. It is built on a distributed hash table (DHT) but unlike previous DHT based approaches it does not redistribute data across the network. It exploits the DHT infrastructure for failure-resilient metadata replication, query dissemination, and result aggregation. We analytically compare Seaweed's scalability against other approaches and also evaluate the Seaweed prototype running on a large-scale network simulator driven by real-world traces.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Bernstein:2008:IMC, author = "Philip A. Bernstein and Todd J. Green and Sergey Melnik and Alan Nash", title = "Implementing mapping composition", journal = j-VLDB-J, volume = "17", number = "2", pages = "333--353", month = mar, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:27 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Mapping composition is a fundamental operation in metadata driven applications. Given a mapping over schemas $ \caret {A}_1 $ and $ \caret {A}_2 $ and a mapping over schemas $ \caret {A}_2 $ and $ \caret {A}_3 $, the composition problem is to compute an equivalent mapping over $ \caret {A}_1 $ and $ \caret {A}_3 $. We describe a new composition algorithm that targets practical applications. It incorporates view unfolding. It eliminates as many $ \caret {A}_2 $ symbols as possible, even if not all can be eliminated. It covers constraints expressed using arbitrary monotone relational operators and, to a lesser extent, non-monotone operators. And it introduces the new technique of left composition. We describe our implementation, explain how to extend it to support user-defined operators, and present experimental results which validate its effectiveness.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "mapping composition; model management; schema mappings", } @Article{Li:2008:ESF, author = "Yunyao Li and Cong Yu and H. V. Jagadish", title = "Enabling {Schema-Free XQuery} with meaningful query focus", journal = j-VLDB-J, volume = "17", number = "3", pages = "355--377", month = may, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The widespread adoption of XML holds the promise that document structure can be exploited to specify precise database queries. However, users may have only a limited knowledge of the XML structure, and may be unable to produce a correct XQuery expression, especially in the context of a heterogeneous information collection. The default is to use keyword-based search and we are all too familiar with how difficult it is to obtain precise answers by these means. We seek to address these problems by introducing the notion of Meaningful Query Focus (MQF) for finding related nodes within an XML document. MQF enables users to take full advantage of the preciseness and efficiency of XQuery without requiring (perfect) knowledge of the document structure. Such a Schema-Free XQuery is potentially of value not just to casual users with partial knowledge of schema, but also to experts working in data integration or data evolution. In such a context, a schema-free query, once written, can be applied universally to multiple data sources that supply similar content under different schemas, and applied `forever' as these schemas evolve. Our experimental evaluation found that it is possible to express a wide variety of queries in a schema-free manner and efficiently retrieve correct results over a broad diversity of schemas. Furthermore, the evaluation of a schema-free query is not expensive: using a novel stack-based algorithm we developed for computing MQF, the overhead is from 1 to 4 times the execution time of an equivalent schema-aware query. The evaluation cost of schema-free queries can be further reduced by as much as 68\% using a selectivity-based algorithm we develop to enable the integration of MQF operation into the query pipeline.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "hierarchical; query language; schema; semi-structured; XML; XQuery", } @Article{Yiu:2008:BTI, author = "Man Lung Yiu and Yufei Tao and Nikos Mamoulis", title = "The {Bdual-Tree}: indexing moving objects by space filling curves in the dual space", journal = j-VLDB-J, volume = "17", number = "3", pages = "379--400", month = may, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Existing spatiotemporal indexes suffer from either large update cost or poor query performance, except for the $ B_x$-tree (the state-of-the-art), which consists of multiple $ B + $-trees indexing the 1D values transformed from the (multi-dimensional) moving objects based on a space filling curve (Hilbert, in particular). This curve, however, does not consider object velocities, and as a result, query processing with a $ B_x$-tree retrieves a large number of false hits, which seriously compromises its efficiency. It is natural to wonder `can we obtain better performance by capturing also the velocity information, using a Hilbert curve of a higher dimensionality?'. This paper provides a positive answer by developing the $B$ dual-tree, a novel spatiotemporal access method leveraging pure relational methodology. We show, with theoretical evidence, that the $B$ dual-tree indeed outperforms the $ B_x$-tree in most circumstances. Furthermore, our technique can effectively answer progressive spatiotemporal queries, which are poorly supported by $ B_x$-trees.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "access method; space filling curve; spatiotemporal", } @Article{Awad:2008:PWS, author = "Mamoun Awad and Latifur Khan and Bhavani Thuraisingham", title = "Predicting {WWW} surfing using multiple evidence combination", journal = j-VLDB-J, volume = "17", number = "3", pages = "401--417", month = may, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The improvement of many applications such as web search, latency reduction, and personalization/ recommendation systems depends on surfing prediction. Predicting user surfing paths involves tradeoffs between model complexity and predictive accuracy. In this paper, we combine two classification techniques, namely, the Markov model and Support Vector Machines (SVM), to resolve prediction using Dempster's rule. Such fusion overcomes the inability of the Markov model in predicting the unseen data as well as overcoming the problem of multiclassification in the case of SVM, especially when dealing with large number of classes. We apply feature extraction to increase the power of discrimination of SVM. In addition, during prediction we employ domain knowledge to reduce the number of classifiers for the improvement of accuracy and the reduction of prediction time. We demonstrate the effectiveness of our hybrid approach by comparing our results with widely used techniques, namely, SVM, the Markov model, and association rule mining.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2008:HBM, author = "Hai Wang and Kenneth C. Sevcik", title = "Histograms based on the minimum description length principle", journal = j-VLDB-J, volume = "17", number = "3", pages = "419--442", month = may, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Histograms have been widely used for selectivity estimation in query optimization, as well as for fast approximate query answering in many OLAP, data mining, and data visualization applications. This paper presents a new family of histograms, the Hierarchical Model Fitting (HMF) histograms, based on the Minimum Description Length principle. Rather than having each bucket of a histogram described by the same type of model, the HMF histograms employ a local optimal model for each bucket. The improved effectiveness of the locally chosen models offsets more than the overhead of keeping track of the representation of each individual bucket. Through a set of experiments, we show that the HMF histograms are capable of providing more accurate approximations than previously proposed techniques for many real and synthetic data sets across a variety of query workloads.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "approximate query answering; data summarization; histograms; query processing", } @Article{Deligiannakis:2008:BCQ, author = "Antonios Deligiannakis and Yannis Kotidis and Nick Roussopoulos", title = "Bandwidth-constrained queries in sensor networks", journal = j-VLDB-J, volume = "17", number = "3", pages = "443--467", month = may, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Sensor networks consist of battery-powered wireless devices that are required to operate unattended for long periods of time. Thus, reducing energy drain is of utmost importance when designing algorithms and applications for such networks. Aggregate queries are often used by monitoring applications to assess the status of the network and detect abnormal behavior. Since radio transmission often constitutes the biggest factor of energy drain in a node, in this paper we propose novel algorithms for the evaluation of bandwidth-constrained queries over sensor networks. The goal of our techniques is, given a target bandwidth utilization factor, to program the sensor nodes in a way that seeks to maximize the accuracy of the produced query results at the monitoring node, while always providing strong error guarantees to the monitoring application. This is a distinct difference of our framework from previous techniques that only provide probabilistic guarantees on the accuracy of the query result. Our algorithms are equally applicable when the nodes have ample power resources, but bandwidth consumption needs to be minimized, for instance in densely distributed networks, to ensure proper operation of the nodes. Our experiments with real sensor data show that bandwidth-constrained queries can substantially reduce the number of messages in the network while providing very tight error bounds on the query result.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "in-network aggregation; sensor networks", } @Article{Hammad:2008:QPM, author = "Moustafa A. Hammad and Walid G. Aref and Ahmed K. Elmagarmid", title = "Query processing of multi-way stream window joins", journal = j-VLDB-J, volume = "17", number = "3", pages = "469--488", month = may, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This paper introduces a class of join algorithms, termed W-join, for joining multiple infinite data streams. W-join addresses the infinite nature of the data streams by joining stream data items that lie within a sliding window and that match a certain join condition. In addition to its general applicability in stream query processing, W-join can be used to track the motion of a moving object or detect the propagation of clouds of hazardous material or pollution spills over time in a sensor network environment. We describe two new algorithms for W-join and address variations and local/global optimizations related to specifying the nature of the window constraints to fulfill the posed queries. The performance of the proposed algorithms is studied experimentally in a prototype stream database system, using synthetic data streams and real time-series data. Tradeoffs of the proposed algorithms and their advantages and disadvantages are highlighted, given variations in the aggregate arrival rates of the input data streams and the desired response times per query.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "multi-way window join; stream query processing", } @Article{Luo:2008:FBP, author = "Qiong Luo and Jeffrey F. Naughton and Wenwei Xue", title = "Form-based proxy caching for database-backed web sites: keywords and functions", journal = j-VLDB-J, volume = "17", number = "3", pages = "489--513", month = may, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Web caching proxy servers are essential for improving web performance and scalability, and recent research has focused on making proxy caching work for database-backed web sites. In this paper, we explore a new proxy caching framework that exploits the query semantics of HTML forms. We identify two common classes of form-based queries from real-world database-backed web sites, namely, keyword-based queries and function-embedded queries. Using typical examples of these queries, we study two representative caching schemes within our framework: (i) traditional passive query caching, and (ii) active query caching, in which the proxy cache can service a request by evaluating a query over the contents of the cache. Results from our experimental implementation show that our form-based proxy is a general and flexible approach that efficiently enables active caching schemes for database-backed web sites. Furthermore, handling query containment at the proxy yields significant performance advantages over passive query caching, but extending the power of the active cache to do full semantic caching appears to be less generally effective.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "database-backed Web sites; Web proxy caching", } @Article{Wang:2008:EAM, author = "Yida Wang and Ee-Peng Lim and San-Yih Hwang", title = "Efficient algorithms for mining maximal valid groups", journal = j-VLDB-J, volume = "17", number = "3", pages = "515--535", month = may, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A valid group is defined as a group of moving users that are within a distance threshold from one another for at least a minimum time duration. Unlike grouping of users determined by traditional clustering algorithms, members of a valid group are expected to stay close to one another during their movement. Each valid group suggests some social grouping that can be used in targeted marketing and social network analysis. The existing valid group mining algorithms are designed to mine a complete set of valid groups from time series of user location data, known as the user movement database. Unfortunately, there are considerable redundancy in the complete set of valid groups. In this paper, we therefore address this problem of mining the set of maximal valid groups. We first extend our previous valid group mining algorithms to mine maximal valid groups, leading to AMG and VGMax algorithms. We further propose the VGBK algorithm based on maximal clique enumeration to mine the maximal valid groups. The performance results of these algorithms under different sets of mining parameters are also reported.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yu:2008:DMW, author = "Qi Yu and Xumin Liu and Athman Bouguettaya and Brahim Medjahed", title = "Deploying and managing {Web} services: issues, solutions, and directions", journal = j-VLDB-J, volume = "17", number = "3", pages = "537--572", month = may, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Web services are expected to be the key technology in enabling the next installment of the Web in the form of the Service Web. In this paradigm shift, Web services would be treated as first-class objects that can be manipulated much like data is now manipulated using a database management system. Hitherto, Web services have largely been driven by standards. However, there is a strong impetus for defining a solid and integrated foundation that would facilitate the kind of innovations witnessed in other fields, such as databases. This survey focuses on investigating the different research problems, solutions, and directions to deploying Web services that are managed by an integrated Web Service Management System (WSMS). The survey identifies the key features of a WSMS and conducts a comparative study on how current research approaches and projects fit in.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "interoperability; service-oriented computing; Web service management system", } @Article{Li:2008:EUD, author = "Changqing Li and Tok Wang Ling and Min Hu", title = "Efficient updates in dynamic {XML} data: from binary string to quaternary string", journal = j-VLDB-J, volume = "17", number = "3", pages = "573--601", month = may, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "XML query processing based on labeling schemes has been thoroughly studied in the past several years. Recently efficient processing of updates in dynamic XML data has gained more attention. However, all the existing techniques have high update cost, they cannot completely avoid re-labeling in XML updates, and they will increase the label size which will influence the query performance. Thus, in this paper we propose a novel Compact Dynamic Binary String (CDBS) encoding to efficiently process updates. CDBS has two important properties which form the foundations of this paper: (1) CDBS supports that CDBS codes can be inserted between any two consecutive CDBS codes with orders kept and without re-encoding the existing codes; (2) CDBS is orthogonal to specific labeling schemes; thus it can be applied broadly to different labeling schemes or other applications to efficiently process updates. Moreover, because CDBS will encounter the overflow problem, we improve CDBS to Compact Dynamic Quaternary String (CDQS) encoding which can completely avoid re-labeling in XML leaf node updates no matter what the labeling schemes are. Meanwhile, we also discuss how to efficiently process internal node updates. We report the experimental results to show that our CDBS and CDQS are superior to previous approaches to process both leaf node and internal node updates.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Tao:2007:MRK, author = "Yufei Tao and Dimitris Papadias and Xiang Lian and Xiaokui Xiao", title = "Multidimensional reverse {kNN} search", journal = j-VLDB-J, volume = "16", number = "3", pages = "293--316", month = jul, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Given a multidimensional point $q$, a reverse $k$ nearest neighbor (RkNN) query retrieves all the data points that have $q$ as one of their $k$ nearest neighbors. Existing methods for processing such queries have at least one of the following deficiencies: they (i) do not support arbitrary values of $k$, (ii) cannot deal efficiently with database updates, (iii) are applicable only to 2D data but not to higher dimensionality, and (iv) retrieve only approximate results. Motivated by these shortcomings, we develop algorithms for exact RkNN processing with arbitrary values of $k$ on dynamic, multidimensional datasets. Our methods utilize a conventional data-partitioning index on the dataset and do not require any pre-computation. As a second step, we extend the proposed techniques to continuous RkNN search, which returns the RkNN results for every point on a line segment. We evaluate the effectiveness of our algorithms with extensive experiments using both real and synthetic datasets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "continuous search; reverse nearest neighbor; spatial database", } @Article{Koch:2007:AGS, author = "Christoph Koch and Stefanie Scherzinger", title = "Attribute grammars for scalable query processing on {XML} streams", journal = j-VLDB-J, volume = "16", number = "3", pages = "317--342", month = jul, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We introduce the notion of XML Stream Attribute Grammars (XSAGs). XSAGs are the first scalable query language for XML streams (running strictly in linear time with bounded memory consumption independent of the size of the stream) that allows for actual data transformations rather than just document filtering. XSAGs are also relatively easy to use for humans. Moreover, the XSAG formalism provides a strong intuition for which queries can or cannot be processed scalably on streams. We introduce XSAGs together with the necessary language-theoretic machinery, study their theoretical properties such as expressiveness and complexity, and discuss their implementation.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "attribute grammars; query languages; stream processing; XML", } @Article{Chan:2007:OES, author = "Edward P. Chan and Heechul Lim", title = "Optimization and evaluation of shortest path queries", journal = j-VLDB-J, volume = "16", number = "3", pages = "343--369", month = jul, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We investigate the problem of how to evaluate efficiently a collection of shortest path queries on massive graphs that are too big to fit in the main memory. To evaluate a shortest path query efficiently, we introduce two pruning algorithms. These algorithms differ on the extent of materialization of shortest path cost and on how the search space is pruned. By grouping shortest path queries properly, batch processing improves the performance of shortest path query evaluation. Extensive study is also done on fragment sizes, cache sizes and query types that we show that affect the performance of a disk-based shortest path algorithm. The performance and scalability of proposed techniques are evaluated with large road systems in the Eastern United States. To demonstrate that the proposed disk-based algorithms are viable, we show that their search times are significant better than that of main-memory Dijkstra's algorithm.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "disk-based algorithms; graph algorithms; graph pruning; query evaluation and optimization; route queries; shortest path queries", } @Article{Lee:2007:DPI, author = "Jae-Gil Lee and Kyu-Young Whang and Wook-Shin Han and Il-Yeol Song", title = "The dynamic predicate: integrating access control with query processing in {XML} databases", journal = j-VLDB-J, volume = "16", number = "3", pages = "371--387", month = jul, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Recently, access control on XML data has become an important research topic. Previous research on access control mechanisms for XML data has focused on increasing the efficiency of access control itself, but has not addressed the issue of integrating access control with query processing. In this paper, we propose an efficient access control mechanism tightly integrated with query processing for XML databases. We present the novel concept of the dynamic predicate $ \caret {A} $ (DP), which represents a dynamically constructed condition during query execution. A DP is derived from instance-level authorizations and constrains accessibility of the elements. The DP allows us to effectively integrate authorization checking into the query plan so that unauthorized elements are excluded in the process of query execution. Experimental results show that the proposed access control mechanism improves query processing time significantly over the state-of-the-art access control mechanisms. We conclude that the DP is highly effective in efficiently checking instance-level authorizations in databases with hierarchical structures.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "access control; privacy/security; query processing; XML databases", } @Article{Papazoglou:2007:SOA, author = "Mike P. Papazoglou and Willem-Jan Heuvel", title = "Service oriented architectures: approaches, technologies and research issues", journal = j-VLDB-J, volume = "16", number = "3", pages = "389--415", month = jul, year = "2007", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Service-oriented architectures (SOA) is an emerging approach that addresses the requirements of loosely coupled, standards-based, and protocol-independent distributed computing. Typically business operations running in an SOA comprise a number of invocations of these different components, often in an event-driven or asynchronous fashion that reflects the underlying business process needs. To build an SOA a highly distributable communications and integration backbone is required. This functionality is provided by the Enterprise Service Bus (ESB) that is an integration platform that utilizes Web services standards to support a wide variety of communications patterns over multiple transport protocols and deliver value-added capabilities for SOA applications. This paper reviews technologies and approaches that unify the principles and concepts of SOA with those of event-based programming. The paper also focuses on the ESB and describes a range of functions that are designed to offer a manageable, standards-based SOA backbone that extends middleware functionality throughout by connecting heterogeneous components and systems and offers integration services. Finally, the paper proposes an approach to extend the conventional SOA to cater for essential ESB requirements that include capabilities such as service orchestration, `intelligent' routing, provisioning, integrity and security of message as well as service management. The layers in this extended SOA, in short xSOA, are used to classify research issues and current research activities.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "application and service integration; asynchronous and event-driven processing; enterprise bus; service oriented architecture; Web services", } @Article{Byun:2008:PBA, author = "Ji-Won Byun and Ninghui Li", title = "Purpose based access control for privacy protection in relational database systems", journal = j-VLDB-J, volume = "17", number = "4", pages = "603--619", month = jul, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this article, we present a comprehensive approach for privacy preserving access control based on the notion of purpose. In our model, purpose information associated with a given data element specifies the intended use of the data element. A key feature of our model is that it allows multiple purposes to be associated with each data element and also supports explicit prohibitions, thus allowing privacy officers to specify that some data should not be used for certain purposes. An important issue addressed in this article is the granularity of data labeling, i.e., the units of data with which purposes can be associated. We address this issue in the context of relational databases and propose four different labeling schemes, each providing a different granularity. We also propose an approach to represent purpose information, which results in low storage overhead, and we exploit query modification techniques to support access control based on purpose information. Another contribution of our work is that we address the problem of how to determine the purpose for which certain data are accessed by a given user. Our proposed solution relies on role-based access control (RBAC) models as well as the notion of conditional role which is based on the notions of role attribute and system attribute.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "access control; privacy; private data management; purpose", } @Article{Karayannidis:2008:HCO, author = "Nikos Karayannidis and Timos Sellis", title = "Hierarchical clustering for {OLAP}: the {CUBE File} approach", journal = j-VLDB-J, volume = "17", number = "4", pages = "621--655", month = jul, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This paper deals with the problem of physical clustering of multidimensional data that are organized in hierarchies on disk in a hierarchy-preserving manner. This is called hierarchical clustering. A typical case, where hierarchical clustering is necessary for reducing I/Os during query evaluation, is the most detailed data of an OLAP cube. The presence of hierarchies in the multidimensional space results in an enormous search space for this problem. We propose a representation of the data space that results in a chunk-tree representation of the cube. The model is adaptive to the cube's extensive sparseness and provides efficient access to subsets of data based on hierarchy value combinations. Based on this representation of the search space we formulate the problem as a chunk-to-bucket allocation problem, which is a packing problem as opposed to the linear ordering approach followed in the literature.\par We propose a metric to evaluate the quality of hierarchical clustering achieved (i.e., evaluate the solutions to the problem) and formulate the problem as an optimization problem. We prove its NP-Hardness and provide an effective solution based on a linear time greedy algorithm. The solution of this problem leads to the construction of the CUBE File data structure. We analyze in depth all steps of the construction and provide solutions for interesting sub-problems arising, such as the formation of bucket-regions, the storage of large data chunks and the caching of the upper nodes (root directory) in main memory.\par Finally, we provide an extensive experimental evaluation of the CUBE File's adaptability to the data space sparseness as well as to an increasing number of data points. The main result is that the CUBE File is highly adaptive to even the most sparse data spaces and for realistic cases of data point cardinalities provides hierarchical clustering of high quality and significant space savings.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "CUBE File; data cube; hierarchical clustering; OLAP; Physical data clustering", } @Article{Plattner:2008:EDS, author = "Christian Plattner and Gustavo Alonso and M. Tamer {\"O}zsu", title = "Extending {DBMSs} with satellite databases", journal = j-VLDB-J, volume = "17", number = "4", pages = "657--682", month = jul, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper, we propose an extensible architecture for database engines where satellite databases are used to scale out and implement additional functionality for a centralized database engine. The architecture uses a middleware layer that offers consistent views and a single system image over a cluster of machines with database engines. One of these engines acts as a master copy while the others are read-only snapshots which we call satellites. The satellites are lightweight DBMSs used for scalability and to provide functionality difficult or expensive to implement in the main engine. Our approach also supports the dynamic creation of satellites to be able to autonomously adapt to varying loads. The paper presents the architecture, discusses the research problems it raises, and validates its feasibility with extensive experimental results.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "dynamic satellite creation; extending database functionality; satellite databases; snapshot isolation", } @Article{Hsieh:2008:DEF, author = "Ming-Jyh Hsieh and Wei-Guang Teng and Ming-Syan Chen and Philip S. Yu", title = "{DAWN}: an efficient framework of {DCT} for data with error estimation", journal = j-VLDB-J, volume = "17", number = "4", pages = "683--702", month = jul, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "On-line analytical processing (OLAP) has become an important component in most data warehouse systems and decision support systems in recent years. In order to deal with the huge amount of data, highly complex queries and increasingly strict response time requirements, approximate query processing has been deemed a viable solution. Most works in this area, however, focus on the space efficiency and are unable to provide quality-guaranteed answers to queries. To remedy this, in this paper, we propose an efficient framework of DCT for dAta With error estimatioN, called DAWN, which focuses on answering range-sum queries from compressed OP-cubes transformed by DCT. Specifically, utilizing the techniques of Geometric series and Euler's formula, we devise a robust summation function, called the GE function, to answer range queries in constant time, regardless of the number of data cells involved. Note that the GE function can estimate the summation of cosine functions precisely; thus the quality of the answers is superior to that of previous works. Furthermore, an estimator of errors based on the Brown noise assumption (BNA) is devised to provide tight bounds for answering range-sum queries. Our experiment results show that the DAWN framework is scalable to the selectivity of queries and the available storage space. With GE functions and the BNA method, the DAWN framework not only delivers high quality answers for range-sum queries, but also leads to shorter query response time due to its effectiveness in error estimation.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Atzori:2008:APP, author = "Maurizio Atzori and Francesco Bonchi and Fosca Giannotti and Dino Pedreschi", title = "Anonymity preserving pattern discovery", journal = j-VLDB-J, volume = "17", number = "4", pages = "703--727", month = jul, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "It is generally believed that data mining results do not violate the anonymity of the individuals recorded in the source database. In fact, data mining models and patterns, in order to ensure a required statistical significance, represent a large number of individuals and thus conceal individual identities: this is the case of the minimum support threshold in frequent pattern mining. In this paper we show that this belief is ill-founded. By shifting the concept of $k$-anonymity from the source data to the extracted patterns, we formally characterize the notion of a threat to anonymity in the context of pattern discovery, and provide a methodology to efficiently and effectively identify all such possible threats that arise from the disclosure of the set of extracted patterns. On this basis, we obtain a formal notion of privacy protection that allows the disclosure of the extracted knowledge while protecting the anonymity of the individuals in the source database. Moreover, in order to handle the cases where the threats to anonymity cannot be avoided, we study how to eliminate such threats by means of pattern (not data!) distortion performed in a controlled way.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "anonymity; frequent pattern mining; individual privacy; knowledge discovery; privacy preserving data mining", } @Article{Morfonios:2008:SDC, author = "Konstantinos Morfonios and Yannis Ioannidis", title = "Supporting the data cube lifecycle: the power of {ROLAP}", journal = j-VLDB-J, volume = "17", number = "4", pages = "729--764", month = jul, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The lifecycle of a data cube involves efficient construction and storage, fast query answering, and incremental updating. Existing ROLAP methods that implement data cubes are weak with respect to one or more of the above, focusing mainly on construction and storage. In this paper, we present a comprehensive ROLAP solution that addresses efficiently all functionality in the lifecycle of a cube and can be implemented easily over existing relational servers. It is a family of algorithms centered around a purely ROLAP construction method that provides fast computation of a fully materialized cube in compressed form, is incrementally updatable, and exhibits quick query response times that can be improved by low-cost indexing and caching. This is demonstrated through comprehensive experiments on both synthetic and real-world datasets, whose results have shown great promise for the performance and scalability potential of the proposed techniques, with respect to both the size and dimensionality of the fact table.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "caching; compressed storage; data cube; incremental updating; indexing; query processing; ROLAP", } @Article{Sharifzadeh:2008:OSR, author = "Mehdi Sharifzadeh and Mohammad Kolahdouzan and Cyrus Shahabi", title = "The optimal sequenced route query", journal = j-VLDB-J, volume = "17", number = "4", pages = "765--787", month = jul, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Real-world road-planning applications often result in the formulation of new variations of the nearest neighbor (NN) problem requiring new solutions. In this paper, we study an unexplored form of NN queries named optimal sequenced route (OSR) query in both vector and metric spaces. OSR strives to find a route of minimum length starting from a given source location and passing through a number of typed locations in a particular order imposed on the types of the locations. We first transform the OSR problem into a shortest path problem on a large planar graph. We show that a classic shortest path algorithm such as Dijkstra's is impractical for most real-world scenarios. Therefore, we propose LORD, a light threshold-based iterative algorithm, which utilizes various thresholds to prune the locations that cannot belong to the optimal route. Then we propose R-LORD, an extension of LORD which uses R-tree to examine the threshold values more efficiently. Finally, for applications that cannot tolerate the Euclidean distance as estimation and require exact distance measures in metric spaces (e.g., road networks) we propose PNE that progressively issues NN queries on different point types to construct the optimal route for the OSR query. Our extensive experiments on both real-world and synthetic datasets verify that our algorithms significantly outperform a disk-based variation of the Dijkstra approach in terms of processing time (up to two orders of magnitude) and required workspace (up to 90\% reduction on average).", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "nearest neighbor search; spatial databases; trip planning queries", } @Article{Friedman:2008:PAD, author = "Arik Friedman and Ran Wolff and Assaf Schuster", title = "Providing $k$-anonymity in data mining", journal = j-VLDB-J, volume = "17", number = "4", pages = "789--804", month = jul, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper we present extended definitions of $k$-anonymity and use them to prove that a given data mining model does not violate the $k$-anonymity of the individuals represented in the learning examples. Our extension provides a tool that measures the amount of anonymity retained during data mining. We show that our model can be applied to various data mining problems, such as classification, association rule mining and clustering. We describe two data mining algorithms which exploit our extension to guarantee they will generate only $k$-anonymous output, and provide experimental results for one of them. Finally, we show that our method contributes new and efficient ways to anonymize data and preserve patterns during anonymization.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Harder:2008:VCC, author = "Theo H{\"a}rder and Andreas B{\"u}hmann", title = "Value complete, column complete, predicate complete", journal = j-VLDB-J, volume = "17", number = "4", pages = "805--826", month = jul, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Caching is a proven remedy to enhance scalability and availability of software systems as well as to reduce latency of user requests. In contrast to Web caching where single Web objects are accessed and kept ready somewhere in caches in the user-to-server path, database caching uses full-fledged database management systems as caches, close to application servers at the edge of the Web, to adaptively maintain sets of records from a remote database and to evaluate queries on them. We analyze a new class of approaches to database caching where the extensions of query predicates that are to be evaluated are constructed by constraints in the cache. Starting from the key concept of value completeness, we explore the application of cache constraints and their implications on query evaluation correctness and on controllable cache loading called cache safeness. Furthermore, we identify simple rules for the design of cache groups and their optimization before discussing the use of single cache groups and cache group federations. Finally, we argue that predicate completeness can be used to develop new variants of constraint-based database caching.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "cache constraints; database caching; predicate completeness; query processing", } @Article{Ou:2008:EAI, author = "Jian-Chih Ou and Chang-Hung Lee and Ming-Syan Chen", title = "Efficient algorithms for incremental {Web} log mining with dynamic thresholds", journal = j-VLDB-J, volume = "17", number = "4", pages = "827--845", month = jul, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "With the fast increase in Web activities, Web data mining has recently become an important research topic and is receiving a significant amount of interest from both academic and industrial environments. While existing methods are efficient for the mining of frequent path traversal patterns from the access information contained in a log file, these approaches are likely to over evaluate associations. Explicitly, most previous studies of mining path traversal patterns are based on the model of a uniform support threshold, where a single support threshold is used to determine frequent traversal patterns without taking into consideration such important factors as the length of a pattern, the positions of Web pages, and the importance of a particular pattern, etc. As a result, a low support threshold will lead to lots of uninteresting patterns derived whereas a high support threshold may cause some interesting patterns with lower supports to be ignored. In view of this, this paper broadens the horizon of frequent path traversal pattern mining by introducing a flexible model of mining Web traversal patterns with dynamic thresholds. Specifically, we study and apply the Markov chain model to provide the determination of support threshold of Web documents; and further, by properly employing some effective techniques devised for joining reference sequences, the proposed algorithm dynamic threshold miner (DTM) not only possesses the capability of mining with dynamic thresholds, but also significantly improves the execution efficiency as well as contributes to the incremental mining of Web traversal patterns. Performance of algorithm DTM and the extension of existing methods is comparatively analyzed with synthetic and real Web logs. It is shown that the option of algorithm DTM is very advantageous in reducing the number of unnecessary rules produced and leads to prominent performance improvement.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "dynamic support threshold; Web mining path traversal pattern", } @Article{Alagic:2008:GJP, author = "Suad Alagi{\'c} and Mark Royer", title = "Genericity in {Java}: persistent and database systems implications", journal = j-VLDB-J, volume = "17", number = "4", pages = "847--878", month = jul, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Lack of parametric polymorphism has been a major obstacle for making Java a viable database programming language. Regrettably, a recently accepted solution for genericity in Java 5.0 has far-reaching negative implications for persistent and database systems because of static and dynamic type violations. Severe implications occur in typical database transactions when processing a variety of database collections. Well-known approaches to persistence in Java, including Java's own persistence mechanism, do not perform correctly due to incorrect dynamic type information that gets promoted to persistence along with objects. Dynamic checking of types of objects fetched from the persistent store may now lead to unexpected type violations. Further problems occur in reflective transactions as Java Core Reflection now allows dynamic type violations without detecting them or throwing standard exceptions. All of this shows that extending Java with parametric polymorphism has not made Java a more viable database programming language. Both legacy systems, such as those based on the Java binding of the ODMG or JDO, and future Java-related persistent and database technologies will be affected. The source of these problems is in an implementation idiom called type erasure. This paper provides formal proofs of the above implications of type erasure along with specific samples of code in Java 5.0 illustrating these violations. The limitations of the virtual platform and extensions required for persistent systems to solve this problem are also elaborated.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Java; object persistence; object-oriented databases; reflection; transactions; type systems; virtual platforms", } @Article{Vaidya:2008:PPN, author = "Jaideep Vaidya and Murat Kantarc{\i}o{\u{g}}lu and Chris Clifton", title = "Privacy-preserving {Na{\"\i}ve Bayes} classification", journal = j-VLDB-J, volume = "17", number = "4", pages = "879--898", month = jul, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Privacy-preserving data mining--developing models without seeing the data --- is receiving growing attention. This paper assumes a privacy-preserving distributed data mining scenario: data sources collaborate to develop a global model, but must not disclose their data to others. The problem of secure distributed classification is an important one. In many situations, data is split between multiple organizations. These organizations may want to utilize all of the data to create more accurate predictive models while revealing neither their training data/databases nor the instances to be classified. Na{\"\i}ve Bayes is often used as a baseline classifier, consistently providing reasonable classification performance. This paper brings privacy-preservation to that baseline, presenting protocols to develop a Na{\"\i}ve Bayes classifier on both vertically as well as horizontally partitioned data.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Data mining; Distributed computing; Na{\"\i} Privacy; Security; ve Bayes", } @Article{Fu:2008:STW, author = "Ada Wai-Chee Fu and Eamonn Keogh and Leo Yung Lau and Chotirat Ann Ratanamahatana and Raymond Chi-Wing Wong", title = "Scaling and time warping in time series querying", journal = j-VLDB-J, volume = "17", number = "4", pages = "899--921", month = jul, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The last few years have seen an increasing understanding that dynamic time warping (DTW), a technique that allows local flexibility in aligning time series, is superior to the ubiquitous Euclidean distance for time series classification, clustering, and indexing. More recently, it has been shown that for some problems, uniform scaling (US), a technique that allows global scaling of time series, may just be as important for some problems. In this work, we note that for many real world problems, it is necessary to combine both DTW and US to achieve meaningful results. This is particularly true in domains where we must account for the natural variability of human actions, including biometrics, query by humming, motion-capture/animation, and handwriting recognition. We introduce the first technique which can handle both DTW and US simultaneously, our techniques involve search pruning by means of a lower bounding technique and multi-dimensional indexing to speed up the search. We demonstrate the utility and effectiveness of our method on a wide range of problems in industry, medicine, and entertainment.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "dynamic time warping; nearest neighbor search; scaled and warped matching; subsequence matching; uniform scaling", } @Article{Mouratidis:2008:TBP, author = "Kyriakos Mouratidis and Dimitris Papadias and Spiros Papadimitriou", title = "Tree-based partition querying: a methodology for computing medoids in large spatial datasets", journal = j-VLDB-J, volume = "17", number = "4", pages = "923--945", month = jul, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Besides traditional domains (e.g., resource allocation, data mining applications), algorithms for medoid computation and related problems will play an important role in numerous emerging fields, such as location based services and sensor networks. Since the $k$-medoid problem is NP-hard, all existing work deals with approximate solutions on relatively small datasets. This paper aims at efficient methods for very large spatial databases, motivated by: (1) the high and ever increasing availability of spatial data, and (2) the need for novel query types and improved services. The proposed solutions exploit the intrinsic grouping properties of a data partition index in order to read only a small part of the dataset. Compared to previous approaches, we achieve results of comparable or better quality at a small fraction of the CPU and I/O costs (seconds as opposed to hours, and tens of node accesses instead of thousands). In addition, we study medoid-aggregate queries, where $k$ is not known in advance, but we are asked to compute a medoid set that leads to an average distance close to a user-specified value. Similarly, medoid-optimization queries aim at minimizing both the number of medoids $k$ and the average distance. We also consider the max version for the aforementioned problems, where the goal is to minimize the maximum (instead of the average) distance between any object and its closest medoid. Finally, we investigate bichromatic and weighted medoid versions for all query types, as well as, maximum capacity and dynamic medoids.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "medoid queries; query processing; spatial databases", } @Article{Yu:2008:DMP, author = "Jeffrey Xu Yu and Zhiheng Li and Guimei Liu", title = "A data mining proxy approach for efficient frequent itemset mining", journal = j-VLDB-J, volume = "17", number = "4", pages = "947--970", month = jul, year = "2008", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Data mining has attracted a lot of research efforts during the past decade. However, little work has been reported on the efficiency of supporting a large number of users who issue different data mining queries periodically when there are new needs and when data is updated. Our work is motivated by the fact that the pattern-growth method is one of the most efficient methods for frequent pattern mining which constructs an initial tree and mines frequent patterns on top of the tree. In this paper, we present a data mining proxy approach that can reduce the I/O costs to construct an initial tree by utilizing the trees that have already been resident in memory. The tree we construct is the smallest for a given data mining query. In addition, our proxy approach can also reduce CPU cost in mining patterns, because the cost of mining relies on the sizes of trees. The focus of the work is to construct an initial tree efficiently. We propose three tree operations to construct a tree. With a unique coding scheme, we can efficiently project subtrees from on-disk trees or in-memory trees. Our performance study indicated that the data mining proxy significantly reduces the I/O cost to construct trees and CPU cost to mine patterns over the trees constructed.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Mokbel:2008:SSL, author = "Mohamed F. Mokbel and Walid G. Aref", title = "{SOLE}: scalable on-line execution of continuous queries on spatio-temporal data streams", journal = j-VLDB-J, volume = "17", number = "5", pages = "971--995", month = aug, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0046-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 10 10:00:50 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This paper presents the scalable on-line execution (SOLE) algorithm for continuous and on-line evaluation of concurrent continuous spatio-temporal queries over data streams. Incoming spatio-temporal data streams are processed in-memory against a set of outstanding continuous queries. The SOLE algorithm utilizes the scarce memory resource efficiently by keeping track of only the significant objects. In-memory stored objects are expired (i.e., dropped) from memory once they become insignificant. SOLE is a scalable algorithm where all the continuous outstanding queries share the same buffer pool. In addition, SOLE is presented as a spatio-temporal join between two input streams, a stream of spatio-temporal objects and a stream of spatio-temporal queries. To cope with intervals of high arrival rates of objects and/or queries, SOLE utilizes a load-shedding approach where some of the stored objects are dropped from memory. SOLE is implemented as a pipelined query operator that can be combined with traditional query operators in a query execution plan to support a wide variety of continuous queries. Performance experiments based on a real implementation of SOLE inside a prototype of a data stream management system show the scalability and efficiency of SOLE in highly dynamic environments.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Pol:2008:MVL, author = "Abhijit Pol and Christopher Jermaine and Subramanian Arumugam", title = "Maintaining very large random samples using the geometric file", journal = j-VLDB-J, volume = "17", number = "5", pages = "997--1018", month = aug, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0048-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 10 10:00:50 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Random sampling is one of the most fundamental data management tools available. However, most current research involving sampling considers the problem of how to use a sample, and not how to compute one. The implicit assumption is that a 'sample' is a small data structure that is easily maintained as new data are encountered, even though simple statistical arguments demonstrate that very large samples of gigabytes or terabytes in size can be necessary to provide high accuracy. No existing work tackles the problem of maintaining very large, disk-based samples from a data management perspective, and no techniques now exist for maintaining very large samples in an online manner from streaming data. In this paper, we present online algorithms for maintaining on-disk samples that are gigabytes or terabytes in size. The algorithms are designed for streaming data, or for any environment where a large sample must be maintained online in a single pass through a data set. The algorithms meet the strict requirement that the sample always be a true, statistically random sample (without replacement) of all of the data processed thus far. We also present algorithms to retrieve small size random sample from large disk-based sample which may be used for various purposes including statistical analyses by a DBMS.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Abiteboul:2008:AXP, author = "Serge Abiteboul and Omar Benjelloun and Tova Milo", title = "The {Active XML} project: an overview", journal = j-VLDB-J, volume = "17", number = "5", pages = "1019--1040", month = aug, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0049-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 10 10:00:50 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This paper provides an overview of the Active XML project developed at INRIA over the past five years. Active XML (AXML, for short), is a declarative framework that harnesses Web services for distributed data management, and is put to work in a peer-to-peer architecture. The model is based on AXML documents, which are XML documents that may contain embedded calls to Web services, and on AXML services, which are Web services capable of exchanging AXML documents. An AXML peer is a repository of AXML documents that acts both as a client by invoking the embedded service calls, and as a server by providing AXML services, which are generally defined as queries or updates over the persistent AXML documents. The approach gracefully combines stored information with data defined in an intensional manner as well as dynamic information. This simple, rather classical idea leads to a number of technically challenging problems, both theoretical and practical. In this paper, we describe and motivate the AXML model and language, overview the research results obtained in the course of the project, and show how all the pieces come together in our implementation.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Data exchange; Intensional information; Web services; XML", } @Article{Buccafurri:2008:EHT, author = "Francesco Buccafurri and Gianluca Lax and Domenico Sacc{\`a} and Luigi Pontieri and Domenico Rosaci", title = "Enhancing histograms by tree-like bucket indices", journal = j-VLDB-J, volume = "17", number = "5", pages = "1041--1061", month = aug, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0050-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 10 10:00:50 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Histograms are used to summarize the contents of relations into a number of buckets for the estimation of query result sizes. Several techniques have been proposed in the past for determining bucket boundaries which provide accurate estimations. However, while search strategies for optimal bucket boundaries are rather sophisticated, no much attention has been paid for estimating queries inside buckets and all of the above techniques adopt naive methods for such an estimation. This paper focuses on the problem of improving the estimation inside a bucket once its boundaries have been fixed. The proposed technique is based on the addition, to each bucket, of a memory-word additional information (organized into a tree-like index), storing approximate cumulative frequencies in a hierarchical fashion. Both theoretical analysis and experimental results show that the proposed approach improves the accuracy of the estimation inside buckets, w.r.t. both classical approaches (like continuous value assumption and uniform spread assumption) and a number of alternative ways to organize the additional information. The index is later added to state-of-the-art histograms obtaining the non-obvious result that despite the spatial overhead which reduces the number of allowed buckets once the storage space has been fixed, the original methods are strongly improved in terms of accuracy.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Approximate OLAP; Histograms; Range query estimation", } @Article{Kamra:2008:DAA, author = "Ashish Kamra and Evimaria Terzi and Elisa Bertino", title = "Detecting anomalous access patterns in relational databases", journal = j-VLDB-J, volume = "17", number = "5", pages = "1063--1077", month = aug, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0051-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 10 10:00:50 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A considerable effort has been recently devoted to the development of Database Management Systems (DBMS) which guarantee high assurance and security. An important component of any strong security solution is represented by Intrusion Detection (ID) techniques, able to detect anomalous behavior of applications and users. To date, however, there have been few ID mechanisms proposed which are specifically tailored to function within the DBMS. In this paper, we propose such a mechanism. Our approach is based on mining SQL queries stored in database audit log files. The result of the mining process is used to form profiles that can model normal database access behavior and identify intruders. We consider two different scenarios while addressing the problem. In the first case, we assume that the database has a Role Based Access Control (RBAC) model in place. Under a RBAC system permissions are associated with roles, grouping several users, rather than with single users. Our ID system is able to determine role intruders, that is, individuals while holding a specific role, behave differently than expected. An important advantage of providing an ID technique specifically tailored to RBAC databases is that it can help in protecting against insider threats. Furthermore, the existence of roles makes our approach usable even for databases with large user population. In the second scenario, we assume that there are no roles associated with users of the database. In this case, we look directly at the behavior of the users. We employ clustering algorithms to form concise profiles representing normal user behavior. For detection, we either use these clustered profiles as the roles or employ outlier detection techniques to identify behavior that deviates from the profiles. Our preliminary experimental evaluation on both real and synthetic database traces shows that our methods work well in practical situations.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Anomaly detection; DBMS; Intrusion detection; RBAC; User profiles", } @Article{Guha:2008:WSH, author = "Sudipto Guha and Hyoungmin Park and Kyuseok Shim", title = "Wavelet synopsis for hierarchical range queries with workloads", journal = j-VLDB-J, volume = "17", number = "5", pages = "1079--1099", month = aug, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0052-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 10 10:00:50 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Synopses structures and approximate query answering have become increasingly important in DSS/ OLAP applications with stringent response time requirements. Range queries are an important class of problems in this domain, and have a wide variety of applications and have been studied in the context of histograms. However, wavelets have been shown to be quite useful in several scenarios and in fact their multi-resolution structure makes them especially appealing for hierarchical domains. Furthermore the fact that the Haar wavelet basis has a linear time algorithm for the computation of coefficients has made the Haar basis one of the important and widely used synopsis structures. Very recently optimal algorithms were proposed for the wavelet synopsis construction problem for equality/point queries. In this paper we investigate the problem of optimum Haar wavelet synopsis construction for range queries with workloads. We provide optimum algorithms as well as approximation heuristics and demonstrate the effectiveness of these algorithms with our extensive experimental evaluation using synthetic and real-life data sets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Approximate query answers; Data synopses; Query processing; Wavelet decomposition", } @Article{Deng:2008:MRS, author = "Ke Deng and Xiaofang Zhou and Heng Tao Shen and Qing Liu and Kai Xu and Xuemin Lin", title = "A multi-resolution surface distance model for {$k$-NN} query processing", journal = j-VLDB-J, volume = "17", number = "5", pages = "1101--1119", month = aug, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0053-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 10 10:00:50 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A spatial k-NN query returns k nearest points in a point dataset to a given query point. To measure the distance between two points, most of the literature focuses on the Euclidean distance or the network distance. For many applications, such as wildlife movement, it is necessary to consider the surface distance, which is computed from the shortest path along a terrain surface. In this paper, we investigate the problem of efficient surface k-NN (sk-NN) query processing. This is an important yet highly challenging problem because the underlying environment data can be very large and the computational cost of finding the shortest path on a surface can be very high. To minimize the amount of surface data to be used and the cost of surface distance computation, a multi-resolution surface distance model is proposed in this paper to take advantage of monotonic distance changes when the distances are computed at different resolution levels. Based on this innovative model, sk-NN queries can be processed efficiently by accessing and processing surface data at a just-enough resolution level within a just-enough search region. Our extensive performance evaluations using real world datasets confirm the efficiency of our proposed model.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chuang:2008:PLR, author = "Kun-Ta Chuang and Jiun-Long Huang and Ming-Syan Chen", title = "Power-law relationship and self-similarity in the itemset support distribution: analysis and applications", journal = j-VLDB-J, volume = "17", number = "5", pages = "1121--1141", month = aug, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0054-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 10 10:00:50 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper, we identify and explore that the power-law relationship and the self-similar phenomenon appear in the itemset support distribution. The itemset support distribution refers to the distribution of the count of itemsets versus their supports. Exploring the characteristics of these natural phenomena is useful to many applications such as providing the direction of tuning the performance of the frequent-itemset mining. However, due to the explosive number of itemsets, it is prohibitively expensive to retrieve lots of itemsets before we identify the characteristics of the itemset support distribution in targeted data. As such, we also propose a valid and cost-effective algorithm, called algorithm PPL, to extract characteristics of the itemset support distribution. Furthermore, to fully explore the advantages of our discovery, we also propose novel mechanisms with the help of PPL to solve two important problems: (1) determining a subtle parameter for mining approximate frequent itemsets over data streams; and (2) determining the sufficient sample size for mining frequent patterns. As validated in our experimental results, PPL can efficiently and precisely identify the characteristics of the itemset support distribution in various real data. In addition, empirical studies also demonstrate that our mechanisms for those two challenging problems are in orders of magnitude better than previous works, showing the prominent advantage of PPL to be an important pre-processing means for mining applications.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Padmanabhan:2008:SDR, author = "Prasanna Padmanabhan and Le Gruenwald and Anita Vallur and Mohammed Atiquzzaman", title = "A survey of data replication techniques for mobile ad hoc network databases", journal = j-VLDB-J, volume = "17", number = "5", pages = "1143--1164", month = aug, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0055-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 10 10:00:50 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A mobile ad hoc network (MANET) is a network that allows mobile servers and clients to communicate in the absence of a fixed infrastructure. MANET is a fast growing area of research as it finds use in a variety of applications. In order to facilitate efficient data access and update, databases are deployed on MANETs. These databases that operate on MANETs are referred to as MANET databases. Since data availability in MANETs is affected by the mobility and power constraints of the servers and clients, data in MANETs are replicated. A number of data replication techniques have been proposed for MANET databases. This paper identifies issues involved in MANET data replication and attempts to classify existing MANET data replication techniques based on the issues they address. The attributes of the replication techniques are also tabulated to facilitate a feature comparison of the existing MANET data replication works. Parameters and performance metrics are also presented to measure the performance of MANET replication techniques. In addition, this paper also proposes criteria for selecting appropriate data replication techniques for various application requirements. Finally, the paper concludes with a discussion on future research directions.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Data replication; Mobile ad hoc network databases; Mobile databases", } @Article{Zhong:2008:GPT, author = "Sheng Zhong and Zhiqiang Yang", title = "Guided perturbation: towards private and accurate mining", journal = j-VLDB-J, volume = "17", number = "5", pages = "1165--1177", month = aug, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0056-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 10 10:00:50 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "There have been two methods for privacy- preserving data mining: the perturbation approach and the cryptographic approach. The perturbation approach is typically very efficient, but it suffers from a tradeoff between accuracy and privacy. In contrast, the cryptographic approach usually maintains accuracy, but it is more expensive in computation and communication overhead. We propose a novel perturbation method, called guided perturbation. Specifically, we focus on a central problem of privacy-preserving data mining--the secure scalar product problem of vertically partitioned data, and give a solution based on guided perturbation, with good, provable privacy guarantee. Our solution achieves accuracy comparable to the cryptographic solutions, while keeping the efficiency of perturbation solutions. Our experimental results show that it can be more than one hundred times faster than a typical cryptographic solution.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Rizzolo:2008:TXM, author = "Flavio Rizzolo and Alejandro A. Vaisman", title = "Temporal {XML}: modeling, indexing, and query processing", journal = j-VLDB-J, volume = "17", number = "5", pages = "1179--1212", month = aug, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0058-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 10 10:00:50 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper we address the problem of modeling and implementing temporal data in XML. We propose a data model for tracking historical information in an XML document and for recovering the state of the document as of any given time. We study the temporal constraints imposed by the data model, and present algorithms for validating a temporal XML document against these constraints, along with methods for fixing inconsistent documents. In addition, we discuss different ways of mapping the abstract representation into a temporal XML document, and introduce TXPath, a temporal XML query language that extends XPath 2.0. In the second part of the paper, we present our approach for summarizing and indexing temporal XML documents. In particular we show that by indexing continuous paths, i.e., paths that are valid continuously during a certain interval in a temporal XML graph, we can dramatically increase query performance. To achieve this, we introduce a new class of summaries, denoted TSummary, that adds the time dimension to the well-known path summarization schemes. Within this framework, we present two new summaries: LCP and Interval summaries. The indexing scheme, denoted TempIndex, integrates these summaries with additional data structures. We give a query processing strategy based on TempIndex and a type of ancestor-descendant encoding, denoted temporal interval encoding. We present a persistent implementation of TempIndex, and a comparison against a system based on a non-temporal path index, and one based on DOM. Finally, we sketch a language for updates, and show that the cost of updating the index is compatible with real-world requirements.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Semistructured data; Structural summaries; Temporal databases; XML; XPath", } @Article{Jin:2008:SES, author = "Liang Jin and Chen Li and Rares Vernica", title = "{SEPIA}: estimating selectivities of approximate string predicates in large databases", journal = j-VLDB-J, volume = "17", number = "5", pages = "1213--1229", month = aug, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0061-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 10 10:00:50 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Many database applications have the emerging need to support approximate queries that ask for strings that are similar to a given string, such as 'name similar to smith' and 'telephone number similar to 412-0964'. Query optimization needs the selectivity of such an approximate predicate, i.e., the fraction of records in the database that satisfy the condition. In this paper, we study the problem of estimating selectivities of approximate string predicates. We develop a novel technique, called Sepia, to solve the problem. Given a bag of strings, our technique groups the strings into clusters, builds a histogram structure for each cluster, and constructs a global histogram. It is based on the following intuition: given a query string $q$, a preselected string $p$ in a cluster, and a string $s$ in the cluster, based on the proximity between $q$ and $p$, and the proximity between $p$ and $s$, we can obtain a probability distribution from a global histogram about the similarity between $q$ and $s$. We give a full specification of the technique using the edit distance metric. We study challenges in adopting this technique, including how to construct the histogram structures, how to use them to do selectivity estimation, and how to alleviate the effect of non-uniform errors in the estimation. We discuss how to extend the techniques to other similarity functions. Our extensive experiments on real data sets show that this technique can accurately estimate selectivities of approximate string predicates.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Approximate; Estimation; Selectivity; SEPIA; String", } @Article{Venkateswaran:2008:RBI, author = "Jayendra Venkateswaran and Tamer Kahveci and Christopher Jermaine and Deepak Lachwani", title = "Reference-based indexing for metric spaces with costly distance measures", journal = j-VLDB-J, volume = "17", number = "5", pages = "1231--1251", month = aug, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0062-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 10 10:00:50 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We consider the problem of similarity search in databases with costly metric distance measures. Given limited main memory, our goal is to develop a reference-based index that reduces the number of comparisons in order to answer a query. The idea in reference-based indexing is to select a small set of reference objects that serve as a surrogate for the other objects in the database. We consider novel strategies for selection of references and assigning references to database objects. For dynamic databases with frequent updates, we propose two incremental versions of the selection algorithm. Our experimental results show that our selection and assignment methods far outperform competing methods.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Earth mover's distance; Edit distance; Metric measures; Reference-indexing", } @Article{Tao:2008:PDW, author = "Yufei Tao and Xiaokui Xiao", title = "Primal or dual: which promises faster spatiotemporal search?", journal = j-VLDB-J, volume = "17", number = "5", pages = "1253--1270", month = aug, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0064-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 10 10:00:50 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The existing predictive spatiotemporal indexes can be classified into two categories, depending on whether they are based on the primal or dual methodology. Although we have gained considerable empirical knowledge about various access methods, currently there is only limited understanding on the theoretical characteristics of the two methodologies. In fact, the experimental results in different papers even contradict each other, regarding the relative superiority of the primal and dual techniques. This paper presents a careful study on the query performance of general primal and dual indexes, and reveals important insight into the behavior of each technique. In particular, we mathematically establish the conditions that determine the superiority of each methodology, and provide rigorous justification for well-known observations that have not been properly explained in the literature. Our analytical findings also resolve the contradiction in the experiments of previous work.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Moving objects; Range search; Spatial database; Theory", } @Article{Tao:2008:ETC, author = "Yufei Tao and Xiaokui Xiao", title = "Efficient temporal counting with bounded error", journal = j-VLDB-J, volume = "17", number = "5", pages = "1271--1292", month = aug, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0066-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 10 10:00:50 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This paper studies aggregate search in transaction time databases. Specifically, each object in such a database can be modeled as a horizontal segment, whose $y$-projection is its search key, and its $x$-projection represents the period when the key was valid in history. Given a query timestamp $ q_t$ and a key range $ \vec {q \_ k}$, a count-query retrieves the number of objects that are alive at $ q_t$, and their keys fall in $ \vec {q \_ k}$. We provide a method that accurately answers such queries, with error less than $ \frac {1}{\varepsilon } + \varepsilon \cdot N \_ {\rm alive}(q \_ t)$, where $ N {\rm alive}(q_t)$ is the number of objects alive at time $ q_t$, and $C$ is any constant in $ (0, 1]$. Denoting the disk page size as $B$, and $ n = C N / B$, our technique requires $ O(n)$ space, processes any query in $ O(\log_B n)$ time, and supports each update in $ O(\log_B n)$ amortized I/Os. As demonstrated by extensive experiments, the proposed solutions guarantee query results with extremely high precision (median relative error below 5\%), while consuming only a fraction of the space occupied by the existing approaches that promise precise results.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Aggregate search; Approximate query processing; Temporal database", } @Article{Islam:2008:ACB, author = "Aminul Islam and Diana Inkpen and Iluju Kiringa", title = "Applications of corpus-based semantic similarity and word segmentation to database schema matching", journal = j-VLDB-J, volume = "17", number = "5", pages = "1293--1320", month = aug, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0067-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 10 10:00:50 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper, we present a method for database schema matching: the problem of identifying elements of two given schemas that correspond to each other. Schema matching is useful in e-commerce exchanges, in data integration/warehousing, and in semantic web applications. We first present two corpus-based methods: one method is for determining the semantic similarity of two target words and the other is for automatic word segmentation. Then we present a name-based element-level database schema matching method that exploits both the semantic similarity and the word segmentation methods. Our word similarity method uses pointwise mutual information (PMI) to sort lists of important neighbor words of two target words; the words which are common in both lists are selected and their PMI values are aggregated to calculate the relative similarity score. Our word segmentation method uses corpus type frequency information to choose the type with maximum length and frequency from 'desegmented' text. It also uses a modified forward---backward matching technique using maximum length frequency and entropy rate if any non-matching portions of the text exist. Finally, we exploit both the semantic similarity and the word segmentation methods in our proposed name-based element-level schema matching method. This method uses a single property (i.e., element name) for schema matching and nevertheless achieves a measure score that is comparable to the methods that use multiple properties (e.g., element name, text description, data instance, context description). Our schema matching method also uses normalized and modified versions of the longest common subsequence string matching algorithm with weight factors to allow for a balanced combination. We validate our methods with experimental studies, the results of which suggest that these methods can be a useful addition to the set of existing methods.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Corpus-based methods; Database schema matching; Semantic similarity; Word segmentation", } @Article{Chuang:2008:MTK, author = "Kun-Ta Chuang and Jiun-Long Huang and Ming-Syan Chen", title = "Mining top-k frequent patterns in the presence of the memory constraint", journal = j-VLDB-J, volume = "17", number = "5", pages = "1321--1344", month = aug, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0078-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 10 10:00:50 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We explore in this paper a practicably interesting mining task to retrieve top-$k$ (closed) itemsets in the presence of the memory constraint. Specifically, as opposed to most previous works that concentrate on improving the mining efficiency or on reducing the memory size by best effort, we first attempt to specify the available upper memory size that can be utilized by mining frequent itemsets. To comply with the upper bound of the memory consumption, two efficient algorithms, called MTK and MTK\_Close, are devised for mining frequent itemsets and closed itemsets, respectively, without specifying the subtle minimum support. Instead, users only need to give a more human-understandable parameter, namely the desired number of frequent (closed) itemsets $k$. In practice, it is quite challenging to constrain the memory consumption while also efficiently retrieving top-$k$ itemsets. To effectively achieve this, MTK and MTK\_Close are devised as level-wise search algorithms, where the number of candidates being generated-and-tested in each database scan will be limited. A novel search approach, called {\^A}`?-stair search, is utilized in MTK and MTK\_Close to effectively assign the available memory for testing candidate itemsets with various itemset-lengths, which leads to a small number of required database scans. As demonstrated in the empirical study on real data and synthetic data, instead of only providing the flexibility of striking a compromise between the execution efficiency and the memory consumption, MTK and MTK\_Close can both achieve high efficiency and have a constrained memory bound, showing the prominent advantage to be practical algorithms of mining frequent patterns.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Catarci:2008:GES, author = "Tiziana Catarci and Ren{\'e} J. Miller", title = "Guest editorial: special issue on metadata management", journal = j-VLDB-J, volume = "17", number = "6", pages = "1345--1346", month = nov, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0112-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Oct 22 09:20:08 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Atzeni:2008:MIS, author = "Paolo Atzeni and Paolo Cappellari and Riccardo Torlone and Philip A. Bernstein and Giorgio Gianforme", title = "Model-independent schema translation", journal = j-VLDB-J, volume = "17", number = "6", pages = "1347--1370", month = nov, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0105-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Oct 22 09:20:08 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We discuss a proposal for the implementation of the model management operator ModelGen, which translates schemas from one model to another, for example from object-oriented to SQL or from SQL to XML schema descriptions. The operator can be used to generate database wrappers (e.g., object-oriented or XML to relational), default user interfaces (e.g., relational to forms), or default database schemas from other representations. The approach translates schemas from a model to another, within a predefined, but large and extensible, set of models: given a source schema S expressed in a source model, and a target model TM, it generates a schema $ S' $ expressed in TM that is 'equivalent' to $S$. A wide family of models is handled by using a metamodel in which models can be succinctly and precisely described. The approach expresses the translation as Datalog rules and exposes the source and target of the translation in a generic relational dictionary. This makes the translation transparent, easy to customize and model-independent. The proposal includes automatic generation of translations as composition of basic steps.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Data models; Model management; Schema translation", } @Article{Cudre-Mauroux:2008:PMM, author = "Philippe Cudr{\'e}-Mauroux and Adriana Budura and Manfred Hauswirth and Karl Aberer", title = "{PicShark}: mitigating metadata scarcity through large-scale {P2P} collaboration", journal = j-VLDB-J, volume = "17", number = "6", pages = "1371--1384", month = nov, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0103-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Oct 22 09:20:08 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "With the commoditization of digital devices, personal information and media sharing is becoming a key application on the pervasive Web. In such a context, data annotation rather than data production is the main bottleneck. Metadata scarcity represents a major obstacle preventing efficient information processing in large and heterogeneous communities. However, social communities also open the door to new possibilities for addressing local metadata scarcity by taking advantage of global collections of resources. We propose to tackle the lack of metadata in large-scale distributed systems through a collaborative process leveraging on both content and metadata. We develop a community-based and self-organizing system called PicShark in which information entropy--in terms of missing metadata--is gradually alleviated through decentralized instance and schema matching. Our approach focuses on semi-structured metadata and confines computationally expensive operations to the edge of the network, while keeping distributed operations as simple as possible to ensure scalability. PicShark builds on structured Peer-to-Peer networks for distributed look-up operations, but extends the application of self-organization principles to the propagation of metadata and the creation of schema mappings. We demonstrate the practical applicability of our method in an image sharing scenario and provide experimental evidences illustrating the validity of our approach.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Metadata entropy; Metadata heterogeneity; Metadata scarcity; Peer data management; Peer-to-Peer collaboration", } @Article{Cruz:2008:LFS, author = "Isabel F. Cruz and Huiyong Xiao", title = "A layered framework supporting personal information integration and application design for the semantic desktop", journal = j-VLDB-J, volume = "17", number = "6", pages = "1385--1406", month = nov, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0102-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Oct 22 09:20:08 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "With the development of inexpensive storage devices, space usage is no longer a bottleneck for computer users. However, the increasingly large amount of personal information poses a critical problem to those users: traditional file organization in hierarchical directories may not be suited to the effective management of personal information because it ignores the semantic associations therein and bears no connection with the applications that users will run. To address such limitations, we present our vision of a semantic desktop, which relies on the use of ontologies to annotate and organize data and on the concept of personal information application (PIA), which is associated with a user's task. The PIA designer is the tool that is provided for building a variety of PIAs consisting of views (e.g., text, list, table, graph), which are spatially arranged and display interrelated fragments of the overall personal information. The semantic organization of the data follows a layered architecture that models separately the personal information, the domain data, and the application data. The network of concepts that ensues from extensive annotation and explicit associations lends itself well to rich browsing capabilities and to the formulation of expressive database-like queries. These queries are also the basis for the interaction among views of the PIAs in the same desktop or in networked desktops. In the latter case, the concept of desktop service provides for a semantic platform for the integration of information across different desktops and the web. In this paper, we present in detail the semantic organization of the information, the overall system architecture and implementation aspects, queries and their processing, PIAs and the PIA designer, including usability studies on the designer, and the concepts of semantic navigation in a desktop and of interoperation in a network of desktops.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Candan:2008:SSE, author = "K. Sel{\c{c}}uk Candan and Huiping Cao and Yan Qi and Maria Luisa Sapino", title = "System support for exploration and expert feedback in resolving conflicts during integration of metadata", journal = j-VLDB-J, volume = "17", number = "6", pages = "1407--1444", month = nov, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0109-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Oct 22 09:20:08 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A critical reality in integration is that knowledge obtained from different sources may often be conflicting. Conflict-resolution, whether performed during the design phase or during run-time, can be costly and, if done without a proper understanding of the usage context, can be ineffective. In this paper, we propose a novel exploration and feedback-based approach [FICSR (Pronounced as 'fixer')] to conflict-resolution when integrating metadata from different sources. Rather than relying on purely automated conflict-resolution mechanisms, FICSR brings the domain expert in the conflict-resolution process and informs the integration based on the expert's feedback. In particular, instead of relying on traditional model based definition of consistency (which, whenever there are conflicts, picks a possible world among many), we introduce a ranked interpretation of the metadata and statements about the metadata. This not only enables FICSR to avoid committing to an interpretation too early, but also helps in achieving a more direct correspondence between the experts' (subjective) interpretation of the data and the system's (objective) treatment of the available alternatives. Consequently, the ranked interpretation leads to new opportunities for exploratory feedback for conflict-resolution: within the context of a given statement of interest, (a) a preliminary ranking of candidate matches, representing different resolutions of the conflicts, informs the user about the alternative interpretations of the metadata, while (b) user feedback regarding the preferences among alternatives is exploited to inform the system about the expert's relevant domain knowledge. The expert's feedback, then, is used for resolving not only the conflicts among different sources, but also possible mis-alignments due to the initial matching phase. To enable this {(system \stackrel{\_{informs}}{\longleftrightarrow} user)} feedback process, we develop data structures and algorithms for efficient off-line conflict/agreement analysis of the integrated metadata. We also develop algorithms for efficient on-line query processing, candidate result enumeration, validity analysis, and system feedback. The results are brought together and evaluated in the Feedback-based InConSistency Resolution (FICSR) system.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Exploration of alternatives; Feedback-based conflict-resolution; Metadata integration; System feedback; Taxonomy; User feedback", } @Article{Wang:2008:AXB, author = "Fusheng Wang and Carlo Zaniolo and Xin Zhou", title = "{ArchIS}: an {XML}-based approach to transaction-time temporal database systems", journal = j-VLDB-J, volume = "17", number = "6", pages = "1445--1463", month = nov, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0086-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Oct 22 09:20:08 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Effective support for temporal applications by database systems represents an important technical objective that is difficult to achieve since it requires an integrated solution for several problems, including (i) expressive temporal representations and data models, (ii) powerful languages for temporal queries and snapshot queries, (iii) indexing, clustering and query optimization techniques for managing temporal information efficiently, and (iv) architectures that bring together the different pieces of enabling technology into a robust system. In this paper, we present the ArchIS system that achieves these objectives by supporting a temporally grouped data model on top of RDBMS. ArchIS' architecture uses (a) XML to support temporally grouped (virtual) representations of the database history, (b) XQuery to express powerful temporal queries on such views, (c) temporal clustering and indexing techniques for managing the actual historical data in a relational database, and (d) SQL/XML for executing the queries on the XML views as equivalent queries on the relational database. The performance studies presented in the paper show that ArchIS is quite effective at storing and retrieving under complex query conditions the transaction-time history of relational databases, and can also assure excellent storage efficiency by providing compression as an option. This approach achieves full-functionality transaction-time databases without requiring temporal extensions in XML or database standards, and provides critical support to emerging application areas such as RFID.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Temporal database; Temporal grouping; Temporal query; XML database; XQuery", } @Article{Zhou:2008:DSD, author = "Yongluan Zhou and Beng Chin Ooi and Kian-Lee Tan", title = "Disseminating streaming data in a dynamic environment: an adaptive and cost-based approach", journal = j-VLDB-J, volume = "17", number = "6", pages = "1465--1483", month = nov, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0077-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Oct 22 09:20:08 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In a distributed stream processing system, streaming data are continuously disseminated from the sources to the distributed processing servers. To enhance the dissemination efficiency, these servers are typically organized into one or more dissemination trees. In this paper, we focus on the problem of constructing dissemination trees to minimize the average loss of fidelity of the system. We observe that existing heuristic-based approaches can only explore a limited solution space and hence may lead to sub-optimal solutions. On the contrary, we propose an adaptive and cost-based approach. Our cost model takes into account both the processing cost and the communication cost. Furthermore, as a distributed stream processing system is vulnerable to inaccurate statistics, runtime fluctuations of data characteristics, server workloads, and network conditions, we have designed our scheme to be adaptive to these situations: an operational dissemination tree may be incrementally transformed to a more cost-effective one. Our adaptive strategy employs distributed decisions made by the distributed servers independently based on localized statistics collected by each server at runtime. For a relatively static environment, we also propose two static tree construction algorithms relying on a priori system statistics. These static trees can also be used as initial trees in a dynamic environment. We apply our schemes to both single- and multi-object dissemination. Our extensive performance study shows that the adaptive mechanisms are effective in a dynamic context and the proposed static tree construction algorithms perform close to optimal in a static environment.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Dissemination trees; Distributed stream processing; Streaming data dissemination", } @Article{Kim:2008:SOF, author = "Min-Soo Kim and Kyu-Young Whang and Jae-Gil Lee and Min-Jae Lee", title = "Structural optimization of a full-text $n$-gram index using relational normalization", journal = j-VLDB-J, volume = "17", number = "6", pages = "1485--1507", month = nov, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0082-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Oct 22 09:20:08 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "As the amount of text data grows explosively, an efficient index structure for large text databases becomes ever important. The $n$-gram inverted index (simply, the $n$-gram index) has been widely used in information retrieval or in approximate string matching due to its two major advantages: language-neutral and error-tolerant. Nevertheless, the $n$-gram index also has drawbacks: the size tends to be very large, and the performance of queries tends to be bad. In this paper, we propose the two-level $n$-gram inverted index (simply, the $n$-gram/2L index) that significantly reduces the size and improves the query performance by using the relational normalization theory. We first identify that, in the (full-text) $n$-gram index, there exists redundancy in the position information caused by a non-trivial multivalued dependency. The proposed index eliminates such redundancy by constructing the index in two levels: the front-end index and the back-end index. We formally prove that this two-level construction is identical to the relational normalization process. We call this process structural optimization of the $n$-gram index. The $n$-gram/2L index has excellent properties: (1) it significantly reduces the size and improves the performance compared with the $n$-gram index with these improvements becoming more marked as the database size gets larger; (2) the query processing time increases only very slightly as the query length gets longer. Experimental results using real databases of 1 GB show that the size of the $n$-gram/2L index is reduced by up to 1.9---2.4 times and, at the same time, the query performance is improved by up to 13.1 times compared with those of the $n$-gram index. We also compare the $n$-gram/2L index with Makinen's compact suffix array (CSA) (Proc. 11th Annual Symposium on Combinatorial Pattern Matching pp. 305---319, 2000) stored in disk. Experimental results show that the $n$-gram/2L index outperforms the CSA when the query length is short (i.e., less than 15---20), and the CSA is similar to or better than the $n$-gram/2L index when the query length is long (i.e., more than 15---20).", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "$n$-gram; Inverted index; Multivalued dependency; Text search", } @Article{Guha:2008:STO, author = "Sudipto Guha", title = "On the space--time of optimal, approximate and streaming algorithms for synopsis construction problems", journal = j-VLDB-J, volume = "17", number = "6", pages = "1509--1535", month = nov, year = "2008", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0083-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Oct 22 09:20:08 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Synopses construction algorithms have been found to be of interest in query optimization, approximate query answering and mining, and over the last few years several good synopsis construction algorithms have been proposed. These algorithms have mostly focused on the running time of the synopsis construction vis-a-vis the synopsis quality. However the space complexity of synopsis construction algorithms has not been investigated as thoroughly. Many of the optimum synopsis construction algorithms are expensive in space. For some of these algorithms the space required to construct the synopsis is significantly larger than the space required to store the input. These algorithms rely on the fact that they require a smaller 'working space' and most of the data can be resident on disc. The large space complexity of synopsis construction algorithms is a handicap in several scenarios. In the case of streaming algorithms, space is a fundamental constraint. In case of offline optimal or approximate algorithms, a better space complexity often makes these algorithms much more attractive by allowing them to run in main memory and not use disc, or alternately allows us to scale to significantly larger problems without running out of space. In this paper, we propose a simple and general technique that reduces space complexity of synopsis construction algorithms. As a consequence we show that the notion of 'working space' proposed in these contexts is redundant. This technique can be easily applied to many existing algorithms for synopsis construction problems. We demonstrate the performance benefits of our proposal through experiments on real-life and synthetic data. We believe that our algorithm also generalizes to a broader range of dynamic programs beyond synopsis construction.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lakhal:2009:FFE, author = "Neila Ben Lakhal and Takashi Kobayashi and Haruo Yokota", title = "{FENECIA}: failure endurable nested-transaction based execution of composite {Web} services with incorporated state analysis", journal = j-VLDB-J, volume = "18", number = "1", pages = "1--56", month = jan, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0076-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 15:49:59 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Interest in the Web services (WS) composition (WSC) paradigm is increasing tremendously. A real shift in distributed computing history is expected to occur when the dream of implementing Service-Oriented Architecture (SOA) is realized. However, there is a long way to go to achieve such an ambitious goal. In this paper, we support the idea that, when challenging the WSC issue, the earlier that the inevitability of failures is recognized and proper failure-handling mechanisms are defined, from the very early stage of the composite WS (CWS) specification, the greater are the chances of achieving a significant gain in dependability. To formalize this vision, we present the FENECIA (Failure Endurable Nested-transaction based Execution of Composite Web services with Incorporated state Analysis) framework. Our framework approaches the WSC issue from different points of view to guarantee a high level of dependability. In particular, it aims at being simultaneously a failure-handling-devoted CWS specification, execution, and quality of service (QoS) assessment approach. In the first section of our framework, we focus on answering the need for a specification model tailored for the WS architecture. To this end, we introduce WS-SAGAS, a new transaction model. WS-SAGAS introduces key concepts that are not part of the WS architecture pillars, namely, arbitrary nesting, state, vitality degree, and compensation, to specify failure-endurable CWS as a hierarchy of recursively nested transactions. In addition, to define the CWS execution semantics, without suffering from the hindrance of an XML-based notation, we describe a textual notation that describes a WSC in terms of definition rules, composability rules, and ordering rules, and we introduce graphical and formal notations. These rules provide the solid foundation needed to formulate the execution semantics of a CWS in terms of execution correctness verification dependencies. To ensure dependable execution of the CWS, we present in the second section of FENECIA our architecture THROWS, in which the execution control of the resulting CWS is distributed among engines, discovered dynamically, that communicate in a peer-to-peer fashion. A dependable execution is guaranteed in THROWS by keeping track of the execution progress of a CWS and by enforcing forward and backward recovery. We concentrate in the third section of our approach on showing how the failure consideration is trivial in acquiring more accurate CWS QoS estimations. We propose a model that assesses several QoS properties of CWS, which are specified as WS-SAGAS transactions and executed in THROWS. We validate our proposal and show its feasibility and broad applicability by describing an implemented prototype and a case study.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Composition; Dependability; Distributed execution; Failure; QoS; Transaction model; Web services", } @Article{Sharifzadeh:2009:AVC, author = "Mehdi Sharifzadeh and Cyrus Shahabi", title = "Approximate {Voronoi} cell computation on spatial data streams", journal = j-VLDB-J, volume = "18", number = "1", pages = "57--75", month = jan, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0081-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 15:49:59 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Several studies have exploited the properties of Voronoi diagrams to improve the efficiency of variations of the nearest neighbor search on stored datasets. However, the significance of Voronoi diagrams and their basic building blocks, Voronoi cells, has been neglected when the geometry data is incrementally becoming available as a data stream. In this paper, we study the problem of Voronoi cell computation for fixed 2-d site points when the locations of the neighboring sites arrive as a spatial data stream. We show that the non-streaming solution to the problem does not meet the memory requirements of many realistic scenarios over a sliding window. Hence, we propose AVC-SW, an approximate streaming algorithm that computes $ (1 + \epsilon)$-approximations to the actual exact Voronoi cell in $ O(\kappa)$ where $ \kappa $ is its sample size. With the sliding window model and random arrival of points, we show both analytically and experimentally that for given window size $w$ and parameter $k$, AVC-SW reduces the expected memory requirements of the classic algorithm from $ O(w)$ to $ O(k \log (\frac {w}{k} + 1))$ regardless of the distribution of the points in the 2-d space. This is a significant improvement for most of the real-world scenarios where $ w \gg k$.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Approximation; Sliding window; Spatial data stream; Voronoi cell", } @Article{Vlachos:2009:OPV, author = "Michail Vlachos and Aris Anagnostopoulos and Olivier Verscheure and Philip S. Yu", title = "Online pairing of {VoIP} conversations", journal = j-VLDB-J, volume = "18", number = "1", pages = "77--98", month = jan, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0087-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 15:49:59 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This paper answers the following question; given a multiplicity of evolving 1-way conversations, can a machine or an algorithm discern the conversational pairs in an online fashion, without understanding the content of the communications? Our analysis indicates that this is possible, and can be achieved just by exploiting the temporal dynamics inherent in a conversation. We also show that our findings are applicable for anonymous and encrypted conversations over VoIP networks. We achieve this by exploiting the aperiodic inter-departure time of VoIP packets, hence trivializing each VoIP stream into a binary time-series, indicating the voice activity of each stream. We propose effective techniques that progressively pair conversing parties with high accuracy and in a limited amount of time. Our findings are verified empirically on a dataset consisting of 1,000 conversations. We obtain very high pairing accuracy that reaches 97\% after 5 min of voice conversations. Using a modeling approach we also demonstrate analytically that our result can be extended over an unlimited number of conversations.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Binary time-series clustering; Conversation pairing; Stream clustering; Voice-over-IP", } @Article{Yao:2009:LMK, author = "Yuxia Yao and Xueyan Tang and Ee-Peng Lim", title = "Localized monitoring of {kNN} queries in wireless sensor networks", journal = j-VLDB-J, volume = "18", number = "1", pages = "99--117", month = jan, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0089-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 15:49:59 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Wireless sensor networks have been widely used in civilian and military applications. Primarily designed for monitoring purposes, many sensor applications require continuous collection and processing of sensed data. Due to the limited power supply for sensor nodes, energy efficiency is a major performance concern in query processing. In this paper, we focus on continuous k NN query processing in object tracking sensor networks. We propose a localized scheme to monitor nearest neighbors to a query point. The key idea is to establish a monitoring area for each query so that only the updates relevant to the query are collected. The monitoring area is set up when the k NN query is initially evaluated and is expanded and shrunk on the fly upon object movement. We analyze the optimal maintenance of the monitoring area and develop an adaptive algorithm to dynamically decide when to shrink the monitoring area. Experimental results show that establishing a monitoring area for continuous k NN query processing greatly reduces energy consumption and prolongs network lifetime.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Paton:2009:AQP, author = "Norman W. Paton and Jorge Buenabad-Chavez and Mengsong Chen and Vijayshankar Raman and Garret Swart and Inderpal Narang and Daniel M. Yellin and Alvaro A. Fernandes", title = "Autonomic query parallelization using non-dedicated computers: an evaluation of adaptivity options", journal = j-VLDB-J, volume = "18", number = "1", pages = "119--140", month = jan, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-007-0090-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 15:49:59 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Writing parallel programs that can take advantage of non-dedicated processors is much more difficult than writing such programs for networks of dedicated processors. In a non-dedicated environment such programs must use autonomic techniques to respond to the unpredictable load fluctuations that prevail in the computational environment. In adaptive query processing (AQP), several techniques have been proposed for dynamically redistributing processor load assignments throughout a computation to take account of varying resource capabilities, but we know of no previous study that compares their performance. This paper presents a simulation-based evaluation of these autonomic parallelization techniques in a uniform environment and compares how well they improve the performance of the computation. Four published strategies are compared with a new algorithm that seeks to overcome some weaknesses identified in the existing approaches. In addition, we explore the use of techniques from online algorithms to provide a firm foundation for determining when to adapt in two of the existing algorithms. The evaluations identify situations in which each strategy may be used effectively and in which it should be avoided.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Park:2009:ESR, author = "Laurence A. Park and Kotagiri Ramamohanarao", title = "Efficient storage and retrieval of probabilistic latent semantic information for information retrieval", journal = j-VLDB-J, volume = "18", number = "1", pages = "141--155", month = jan, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0093-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 15:49:59 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Probabilistic latent semantic analysis (PLSA) is a method for computing term and document relationships from a document set. The probabilistic latent semantic index (PLSI) has been used to store PLSA information, but unfortunately the PLSI uses excessive storage space relative to a simple term frequency index, which causes lengthy query times. To overcome the storage and speed problems of PLSI, we introduce the probabilistic latent semantic thesaurus (PLST); an efficient and effective method of storing the PLSA information. We show that through methods such as document thresholding and term pruning, we are able to maintain the high precision results found using PLSA while using a very small percent (0.15\%) of the storage space of PLSI.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Probabilistic latent semantic analysis; Query expansion; Thesaurus", } @Article{Askitis:2009:BTD, author = "Nikolas Askitis and Justin Zobel", title = "{B}-tries for disk-based string management", journal = j-VLDB-J, volume = "18", number = "1", pages = "157--179", month = jan, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0094-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 15:49:59 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A wide range of applications require that large quantities of data be maintained in sort order on disk. The B-tree, and its variants, are an efficient general-purpose disk-based data structure that is almost universally used for this task. The B-trie has the potential to be a competitive alternative for the storage of data where strings are used as keys, but has not previously been thoroughly described or tested. We propose new algorithms for the insertion, deletion, and equality search of variable-length strings in a disk-resident B-trie, as well as novel splitting strategies which are a critical element of a practical implementation. We experimentally compare the B-trie against variants of B-tree on several large sets of strings with a range of characteristics. Our results demonstrate that, although the B-trie uses more memory, it is faster, more scalable, and requires less disk space.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "B-tree; Burst trie; Data structures; Secondary storage; Vocabulary accumulation; Word-level indexing", } @Article{Joshi:2009:SBE, author = "Shantanu Joshi and Christopher Jermaine", title = "Sampling-based estimators for subset-based queries", journal = j-VLDB-J, volume = "18", number = "1", pages = "181--202", month = jan, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0095-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 15:49:59 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We consider the problem of using sampling to estimate the result of an aggregation operation over a subset-based SQL query, where a subquery is correlated to an outer query by a NOT EXISTS, NOT IN, EXISTS or IN clause. We design an unbiased estimator for our query and prove that it is indeed unbiased. We then provide a second, biased estimator that makes use of the superpopulation concept from statistics to minimize the mean squared error of the resulting estimate. The two estimators are tested over an extensive set of experiments.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Aggregate query processing; Approximate query processing; Sampling", } @Article{Sacharidis:2009:HCW, author = "Dimitris Sacharidis and Antonios Deligiannakis and Timos Sellis", title = "Hierarchically compressed wavelet synopses", journal = j-VLDB-J, volume = "18", number = "1", pages = "203--231", month = jan, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0096-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 15:49:59 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The wavelet decomposition is a proven tool for constructing concise synopses of large data sets that can be used to obtain fast approximate answers. Existing research studies focus on selecting an optimal set of wavelet coefficients to store so as to minimize some error metric, without however seeking to reduce the size of the wavelet coefficients themselves. In many real data sets the existence of large spikes in the data values results in many large coefficient values lying on paths of a conceptual tree structure known as the error tree. To exploit this fact, we introduce in this paper a novel compression scheme for wavelet synopses, termed hierarchically compressed wavelet synopses, that fully exploits hierarchical relationships among coefficients in order to reduce their storage. Our proposed compression scheme allows for a larger number of coefficients to be stored for a given space constraint thus resulting in increased accuracy of the produced synopsis. We propose optimal, approximate and greedy algorithms for constructing hierarchically compressed wavelet synopses that minimize the sum squared error while not exceeding a given space budget. Extensive experimental results on both synthetic and real-world data sets validate our novel compression scheme and demonstrate the effectiveness of our algorithms against existing synopsis construction algorithms.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Compression; Data streams; Wavelet synopsis", } @Article{Theodoratos:2009:CPS, author = "Dimitri Theodoratos and Pawel Placek and Theodore Dalamagas and Stefanos Souldatos and Timos Sellis", title = "Containment of partially specified tree-pattern queries in the presence of dimension graphs", journal = j-VLDB-J, volume = "18", number = "1", pages = "233--254", month = jan, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0097-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 15:49:59 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Nowadays, huge volumes of data are organized or exported in tree-structured form. Querying capabilities are provided through tree-pattern queries. The need for querying tree-structured data sources when their structure is not fully known, and the need to integrate multiple data sources with different tree structures have driven, recently, the suggestion of query languages that relax the complete specification of a tree pattern. In this paper, we consider a query language that allows the partial specification of a tree pattern. Queries in this language range from structureless keyword-based queries to completely specified tree patterns. To support the evaluation of partially specified queries, we use semantically rich constructs, called dimension graphs, which abstract structural information of the tree-structured data. We address the problem of query containment in the presence of dimension graphs and we provide necessary and sufficient conditions for query containment. As checking query containment can be expensive, we suggest two heuristic approaches for query containment in the presence of dimension graphs. Our approaches are based on extracting structural information from the dimension graph that can be added to the queries while preserving equivalence with respect to the dimension graph. We considered both cases: extracting and storing different types of structural information in advance, and extracting information on-the-fly (at query time). Both approaches are implemented, validated, and compared through experimental evaluation.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Partial tree-pattern query; Query containment; Tree-structured data; XML", } @Article{Benjelloun:2009:SGA, author = "Omar Benjelloun and Hector Garcia-Molina and David Menestrina and Qi Su and Steven Euijong Whang and Jennifer Widom", title = "{Swoosh}: a generic approach to entity resolution", journal = j-VLDB-J, volume = "18", number = "1", pages = "255--276", month = jan, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0098-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 15:49:59 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We consider the entity resolution (ER) problem (also known as deduplication, or merge---purge), in which records determined to represent the same real-world entity are successively located and merged. We formalize the generic ER problem, treating the functions for comparing and merging records as black-boxes, which permits expressive and extensible ER solutions. We identify four important properties that, if satisfied by the match and merge functions, enable much more efficient ER algorithms. We develop three efficient ER algorithms: G-Swoosh for the case where the four properties do not hold, and R-Swoosh and F-Swoosh that exploit the four properties. F-Swoosh in addition assumes knowledge of the 'features' (e.g., attributes) used by the match function. We experimentally evaluate the algorithms using comparison shopping data from Yahoo! Shopping and hotel information data from Yahoo! Travel. We also show that R-Swoosh (and F-Swoosh) can be used even when the four match and merge properties do not hold, if an 'approximate' result is acceptable.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Data cleaning; Entity resolution; Generic entity resolution", } @Article{Ratprasartporn:2009:CBL, author = "Nattakarn Ratprasartporn and Jonathan Po and Ali Cakmak and Sulieman Bani-Ahmad and Gultekin Ozsoyoglu", title = "Context-based literature digital collection search", journal = j-VLDB-J, volume = "18", number = "1", pages = "277--301", month = jan, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0099-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 15:49:59 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We identify two issues with searching literature digital collections within digital libraries: (a) there are no effective paper-scoring and ranking mechanisms. Without a scoring and ranking system, users are often forced to scan a large and diverse set of publications listed as search results and potentially miss the important ones. (b) Topic diffusion is a common problem: publications returned by a keyword-based search query often fall into multiple topic areas, not all of which are of interest to users. This paper proposes a new literature digital collection search paradigm that effectively ranks search outputs, while controlling the diversity of keyword-based search query output topics. Our approach is as follows. First, during pre-querying, publications are assigned into pre-specified ontology-based contexts, and query-independent context scores are attached to papers with respect to the assigned contexts. When a query is posed, relevant contexts are selected, search is performed within the selected contexts, context scores of publications are revised into relevancy scores with respect to the query at hand and the context that they are in, and query outputs are ranked within each relevant context. This way, we (1) minimize query output topic diversity, (2) reduce query output size, (3) decrease user time spent scanning query results, and (4) increase query output ranking accuracy. Using genomics-oriented PubMed publications as the testbed and Gene Ontology terms as contexts, our experiments indicate that the proposed context-based search approach produces search results with up to 50\% higher precision, and reduces the query output size by up to 70\%.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Context score; Context-based search; Digital collections; Ontology; Ranking", } @Article{Chiu:2009:EFS, author = "Ding-Ying Chiu and Yi-Hung Wu and Arbee L. Chen", title = "Efficient frequent sequence mining by a dynamic strategy switching algorithm", journal = j-VLDB-J, volume = "18", number = "1", pages = "303--327", month = jan, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0100-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 15:49:59 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Mining frequent sequences in large databases has been an important research topic. The main challenge of mining frequent sequences is the high processing cost due to the large amount of data. In this paper, we propose a novel strategy to find all the frequent sequences without having to compute the support counts of non-frequent sequences. The previous works prune candidate sequences based on the frequent sequences with shorter lengths, while our strategy prunes candidate sequences according to the non-frequent sequences with the same lengths. As a result, our strategy can cooperate with the previous works to achieve a better performance. We then identify three major strategies used in the previous works and combine them with our strategy into an efficient algorithm. The novelty of our algorithm lies in its ability to dynamically switch from a previous strategy to our new strategy in the mining process for a better performance. Experiment results show that our algorithm outperforms the previous ones under various parameter settings.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Data mining; Frequent sequence; Sequence comparison; Strategy switching", } @Article{Shen:2009:SII, author = "Heng Tao Shen and Shouxu Jiang and Kian-Lee Tan and Zi Huang and Xiaofang Zhou", title = "Speed up interactive image retrieval", journal = j-VLDB-J, volume = "18", number = "1", pages = "329--343", month = jan, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0101-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 15:49:59 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In multimedia retrieval, a query is typically interactively refined towards the 'optimal' answers by exploiting user feedback. However, in existing work, in each iteration, the refined query is re-evaluated. This is not only inefficient but fails to exploit the answers that may be common between iterations. Furthermore, it may also take too many iterations to get the 'optimal' answers. In this paper, we introduce a new approach called OptRFS (optimizing relevance feedback search by query prediction) for iterative relevance feedback search. OptRFS aims to take users to view the 'optimal' results as fast as possible. It optimizes relevance feedback search by both shortening the searching time during each iteration and reducing the number of iterations. OptRFS predicts the potential candidates for the next iteration and maintains this small set for efficient sequential scan. By doing so, repeated candidate accesses (i.e., random accesses) can be saved, hence reducing the searching time for the next iteration. In addition, efficient scan on the overlap before the next search starts also tightens the search space with smaller pruning radius. As a step forward, OptRFS also predicts the 'optimal' query, which corresponds to 'optimal' answers, based on the early executed iterations' queries. By doing so, some intermediate iterations can be saved, hence reducing the total number of iterations. By taking the correlations among the early executed iterations into consideration, OptRFS investigates linear regression, exponential smoothing and linear exponential smoothing to predict the next refined query so as to decide the overlap of candidates between two consecutive iterations. Considering the special features of relevance feedback, OptRFS further introduces adaptive linear exponential smoothing to self-adjust the parameters for more accurate prediction. We implemented OptRFS and our experimental study on real life data sets show that it can reduce the total cost of relevance feedback search significantly. Some interesting features of relevance feedback search are also discovered and discussed.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Image retrieval; Indexing; Query processing; Relevance feedback", } @Article{Wang:2009:SFS, author = "Shiyuan Wang and Quang Hieu Vu and Beng Chin Ooi and Anthony K. Tung and Lizhen Xu", title = "{Skyframe}: a framework for skyline query processing in peer-to-peer systems", journal = j-VLDB-J, volume = "18", number = "1", pages = "345--362", month = jan, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0104-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 15:49:59 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This paper looks at the processing of skyline queries on peer-to-peer (P2P) networks. We propose Skyframe, a framework for efficient skyline query processing in P2P systems, which addresses the challenges of quick response time, low network communication cost and query load balancing among peers. Skyframe consists of two querying methods: one is optimized for network communication while the other focuses on query response time. These methods are different in the way in which the query search space is defined. In particular, the first method uses a high dominating point that has a large dominating region to prune the search space to achieve a low cost in network communication. On the other hand, the second method relaxes the search space in order to allow parallel query processing to speed up query response. Skyframe achieves query load balancing by both query load conscious data space splitting/merging during the join/departure of nodes and dynamic load migration. We further show how to apply Skyframe to both the P2P systems supporting multi-dimensional indexing and the P2P systems supporting single-dimensional indexing. Finally, we have conducted extensive experiments on both real and synthetic data sets over two existing P2P systems: CAN (Ratnasamy in A scalable content-addressable network. In: Proceedings of SIGCOMM Conference, pp. 161---172, 2001) and BATON (Jagadish et al. in A balanced tree structure for peer-to-peer networks. In: Proceedings of VLDB Conference, pp. 661---672, 2005) to evaluate the effectiveness and scalability of Skyframe.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Framework; Load balancing; Optimization; Peer-to-peer systems; Skyline query processing", } @Article{Mouratidis:2009:PMD, author = "Kyriakos Mouratidis and Dimitris Sacharidis and Hweehwa Pang", title = "Partially materialized digest scheme: an efficient verification method for outsourced databases", journal = j-VLDB-J, volume = "18", number = "1", pages = "363--381", month = jan, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0108-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 15:49:59 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In the outsourced database model, a data owner publishes her database through a third-party server; i.e., the server hosts the data and answers user queries on behalf of the owner. Since the server may not be trusted, or may be compromised, users need a means to verify that answers received are both authentic and complete, i.e., that the returned data have not been tampered with, and that no qualifying results have been omitted. We propose a result verification approach for one-dimensional queries, called Partially Materialized Digest scheme (PMD), that applies to both static and dynamic databases. PMD uses separate indexes for the data and for their associated verification information, and only partially materializes the latter. In contrast with previous work, PMD avoids unnecessary costs when processing queries that do not request verification, achieving the performance of an ordinary index (e.g., a B$^+$ -tree). On the other hand, when an authenticity and completeness proof is required, PMD outperforms the existing state-of-the-art technique by a wide margin, as we demonstrate analytically and experimentally. Furthermore, we design two verification methods for spatial queries. The first, termed Merkle R-tree (MR-tree), extends the conventional approach of embedding authentication information into the data index (i.e., an R-tree). The second, called Partially Materialized KD-tree (PMKD), follows the PMD paradigm using separate data and verification indexes. An empirical evaluation with real data shows that the PMD methodology is superior to the traditional approach for spatial queries too.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Authentication in outsourced databases; Query result verification", } @Article{Garofalakis:2009:SIB, author = "Minos Garofalakis and Johannes Gehrke and Divesh Srivastava", title = "Special issue: best papers of {VLDB 2007}", journal = j-VLDB-J, volume = "18", number = "2", pages = "383--384", month = apr, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-009-0132-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Apr 13 09:15:13 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Abadi:2009:SSV, author = "Daniel J. Abadi and Adam Marcus and Samuel R. Madden and Kate Hollenbach", title = "{SW-Store}: a vertically partitioned {DBMS} for {Semantic Web} data management", journal = j-VLDB-J, volume = "18", number = "2", pages = "385--406", month = apr, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0125-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Apr 13 09:15:13 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Efficient management of RDF data is an important prerequisite for realizing the Semantic Web vision. Performance and scalability issues are becoming increasingly pressing as Semantic Web technology is applied to real-world applications. In this paper, we examine the reasons why current data management solutions for RDF data scale poorly, and explore the fundamental scalability limitations of these approaches. We review the state of the art for improving performance of RDF databases and consider a recent suggestion, 'property tables'. We then discuss practically and empirically why this solution has undesirable features. As an improvement, we propose an alternative solution: vertically partitioning the RDF data. We compare the performance of vertical partitioning with prior art on queries generated by a Web-based RDF browser over a large-scale (more than 50 million triples) catalog of library data. Our results show that a vertically partitioned schema achieves similar performance to the property table technique while being much simpler to design. Further, if a column-oriented DBMS (a database architected specially for the vertically partitioned case) is used instead of a row-oriented DBMS, another order of magnitude performance improvement is observed, with query times dropping from minutes to several seconds. Encouraged by these results, we describe the architecture of SW-Store, a new DBMS we are actively building that implements these techniques to achieve high performance RDF data management.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Arai:2009:AMT, author = "Benjamin Arai and Gautam Das and Dimitrios Gunopulos and Nick Koudas", title = "Anytime measures for top-$k$ algorithms on exact and fuzzy data sets", journal = j-VLDB-J, volume = "18", number = "2", pages = "407--427", month = apr, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0127-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Apr 13 09:15:13 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Top- k queries on large multi-attribute data sets are fundamental operations in information retrieval and ranking applications. In this article, we initiate research on the anytime behavior of top- k algorithms on exact and fuzzy data. In particular, given specific top- k algorithms (TA and TA-Sorted) we are interested in studying their progress toward identification of the correct result at any point during the algorithms' execution. We adopt a probabilistic approach where we seek to report at any point of operation of the algorithm the confidence that the top- k result has been identified. Such a functionality can be a valuable asset when one is interested in reducing the runtime cost of top- k computations. We present a thorough experimental evaluation to validate our techniques using both synthetic and real data sets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Anytime; Approximate query; Fuzzy data; Top-k", } @Article{Chen:2009:AKD, author = "Bee-Chung Chen and Kristen Lefevre and Raghu Ramakrishnan", title = "Adversarial-knowledge dimensions in data privacy", journal = j-VLDB-J, volume = "18", number = "2", pages = "429--467", month = apr, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0118-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Apr 13 09:15:13 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Privacy is an important issue in data publishing. Many organizations distribute non-aggregate personal data for research, and they must take steps to ensure that an adversary cannot predict sensitive information pertaining to individuals with high confidence. This problem is further complicated by the fact that, in addition to the published data, the adversary may also have access to other resources (e.g., public records and social networks relating individuals), which we call adversarial knowledge. A robust privacy framework should allow publishing organizations to analyze data privacy by means of not only data dimensions (data that a publishing organization has), but also adversarial-knowledge dimensions (information not in the data). In this paper, we first describe a general framework for reasoning about privacy in the presence of adversarial knowledge. Within this framework, we propose a novel multidimensional approach to quantifying adversarial knowledge. This approach allows the publishing organization to investigate privacy threats and enforce privacy requirements in the presence of various types and amounts of adversarial knowledge. Our main technical contributions include a multidimensional privacy criterion that is more intuitive and flexible than previous approaches to modeling background knowledge. In addition, we identify an important congregation property of the adversarial-knowledge dimensions. Based on this property, we provide algorithms for measuring disclosure and sanitizing data that improve computational efficiency several orders of magnitude over the best known techniques.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Anonymization; Knowledge expression; Privacy-preserving data publishing; Probabilistic inference; Skyline; Worst-case privacy", } @Article{Dong:2009:DIU, author = "Xin Luna Dong and Alon Halevy and Cong Yu", title = "Data integration with uncertainty", journal = j-VLDB-J, volume = "18", number = "2", pages = "469--500", month = apr, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0119-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Apr 13 09:15:13 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This paper reports our first set of results on managing uncertainty in data integration. We posit that data-integration systems need to handle uncertainty at three levels and do so in a principled fashion. First, the semantic mappings between the data sources and the mediated schema may be approximate because there may be too many of them to be created and maintained or because in some domains (e.g., bioinformatics) it is not clear what the mappings should be. Second, the data from the sources may be extracted using information extraction techniques and so may yield erroneous data. Third, queries to the system may be posed with keywords rather than in a structured form. As a first step to building such a system, we introduce the concept of probabilistic schema mappings and analyze their formal foundations. We show that there are two possible semantics for such mappings: by-table semantics assumes that there exists a correct mapping but we do not know what it is; by-tuple semantics assumes that the correct mapping may depend on the particular tuple in the source data. We present the query complexity and algorithms for answering queries in the presence of probabilistic schema mappings, and we describe an algorithm for efficiently computing the top- k answers to queries in such a setting. Finally, we consider using probabilistic mappings in the scenario of data exchange.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Data exchange; Data integration; Probabilistic schema mapping", } @Article{Gedik:2009:CPS, author = "Bu{\u{g}}ra Gedik and Rajesh R. Bordawekar and Philip S. Yu", title = "{CellJoin}: a parallel stream join operator for the {Cell} processor", journal = j-VLDB-J, volume = "18", number = "2", pages = "501--519", month = apr, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0116-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Apr 13 09:15:13 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Low-latency and high-throughput processing are key requirements of data stream management systems (DSMSs). Hence, multi-core processors that provide high aggregate processing capacity are ideal matches for executing costly DSMS operators. The recently developed Cell processor is a good example of a heterogeneous multi-core architecture and provides a powerful platform for executing data stream operators with high-performance. On the down side, exploiting the full potential of a multi-core processor like Cell is often challenging, mainly due to the heterogeneous nature of the processing elements, the software managed local memory at the co-processor side, and the unconventional programming model in general. In this paper, we study the problem of scalable execution of windowed stream join operators on multi-core processors, and specifically on the Cell processor. By examining various aspects of join execution flow, we determine the right set of techniques to apply in order to minimize the sequential segments and maximize parallelism. Concretely, we show that basic windows coupled with low-overhead pointer-shifting techniques can be used to achieve efficient join window partitioning, column-oriented join window organization can be used to minimize scattered data transfers, delay-optimized double buffering can be used for effective pipelining, rate-aware batching can be used to balance join throughput and tuple delay, and finally single-instruction multiple-data (SIMD) optimized operator code can be used to exploit data parallelism. Our experimental results show that, following the design guidelines and implementation techniques outlined in this paper, windowed stream joins can achieve high scalability (linear in the number of co-processors) by making efficient use of the extensive hardware parallelism provided by the Cell processor (reaching data processing rates of $ \approx $ 13 GB/s) and significantly surpass the performance obtained form conventional high-end processors (supporting a combined input stream rate of 2,000 tuples/s using 15 min windows and without dropping any tuples, resulting in $ \approx $ 8.3 times higher output rate compared to an SSE implementation on dual 3.2 GHz Intel Xeon).", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Schnaitter:2009:DER, author = "Karl Schnaitter and Joshua Spiegel and Neoklis Polyzotis", title = "Depth estimation for ranking query optimization", journal = j-VLDB-J, volume = "18", number = "2", pages = "521--542", month = apr, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0124-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Apr 13 09:15:13 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A relational ranking query uses a scoring function to limit the results of a conventional query to a small number of the most relevant answers. The increasing popularity of this query paradigm has led to the introduction of specialized rank join operators that integrate the selection of top tuples with join processing. These operators access just 'enough' of the input in order to generate just 'enough' output and can offer significant speed-ups for query evaluation. The number of input tuples that an operator accesses is called the input depth of the operator, and this is the driving cost factor in rank join processing. This introduces the important problem of depth estimation, which is crucial for the costing of rank join operators during query compilation and thus for their integration in optimized physical plans. We introduce an estimation methodology, termed deep, for approximating the input depths of rank join operators in a physical execution plan. At the core of deep lies a general, principled framework that formalizes depth computation in terms of the joint distribution of scores in the base tables. This framework results in a systematic estimation methodology that takes the characteristics of the data directly into account and thus enables more accurate estimates. We develop novel estimation algorithms that provide an efficient realization of the formal deep framework, and describe their integration on top of the statistics module of an existing query optimizer. We validate the performance of deep with an extensive experimental study on data sets of varying characteristics. The results verify the effectiveness of deep as an estimation method and demonstrate its advantages over previously proposed techniques.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Data statistics; DEEP; Depth estimation; Query optimization; Relational ranking query; Top-k", } @Article{Shao:2009:EKS, author = "Feng Shao and Lin Guo and Chavdar Botev and Anand Bhaskar and Muthiah Chettiar and Fan Yang and Jayavel Shanmugasundaram", title = "Efficient keyword search over virtual {XML} views", journal = j-VLDB-J, volume = "18", number = "2", pages = "543--570", month = apr, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0126-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Apr 13 09:15:13 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Emerging applications such as personalized portals, enterprise search, and web integration systems often require keyword search over semi-structured views. However, traditional information retrieval techniques are likely to be expensive in this context because they rely on the assumption that the set of documents being searched is materialized. In this paper, we present a system architecture and algorithm that can efficiently evaluate keyword search queries over virtual (unmaterialized) XML views. An interesting aspect of our approach is that it exploits indices present on the base data and thereby avoids materializing large parts of the view that are not relevant to the query results. Another feature of the algorithm is that by solely using indices, we can still score the results of queries over the virtual view, and the resulting scores are the same as if the view was materialized. Our performance evaluation using the INEX data set in the Quark (Bhaskar et al. in Quark: an efficient XQuery full-text implementation. In: SIGMOD, 2006) open-source XML database system indicates that the proposed approach is scalable and efficient.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Document projections; Document pruning; Keyword search; Top-K; XML views", } @Article{Wu:2009:GEV, author = "Mingxi Wu and Chris Jermaine", title = "Guessing the extreme values in a data set: a {Bayesian} method and its applications", journal = j-VLDB-J, volume = "18", number = "2", pages = "571--597", month = apr, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-009-0133-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Apr 13 09:15:13 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "For a large number of data management problems, it would be very useful to be able to obtain a few samples from a data set, and to use the samples to guess the largest (or smallest) value in the entire data set. Min/max online aggregation, Top-k query processing, outlier detection, and distance join are just a few possible applications. This paper details a statistically rigorous, Bayesian approach to attacking this problem. Just as importantly, we demonstrate the utility of our approach by showing how it can be applied to four specific problems that arise in the context of data management.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Bayesian; Extreme values; Monte Carlo; Online aggregation; Sampling", } @Article{Hill:2009:ROJ, author = "Gerhard Hill and Andrew Ross", title = "Reducing outer joins", journal = j-VLDB-J, volume = "18", number = "3", pages = "599--610", month = jun, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0110-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 14:55:19 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We present a method for transforming some outer joins to inner joins and describe a generalized semijoin reduction technique. The first part of the paper shows how to transform a given outer join query whose join graph is a tree to an equivalent inner join query. The method uses derived relations and join predicates. Derived relations contain columns corresponding to join conditions and may have virtual row identifiers, rows and attribute values. The constructed inner join query, after elimination of virtual row identifiers, has the same join tuples as the outer join query. Both the theoretical maximum number of virtual rows and the average number in practice are shown to be low. The method confines consideration of the non-associativity of outer joins to a single step. The second part of the paper generalizes to outer joins the well known technique of semijoin reduction of inner joins. It does so by defining the notions of influencing and needing, and using them to define full reduction and reduction plans. The technique is applied here to perform one step of the method presented in the first part. Semijoin reduction is useful in practice for executing join queries in distributed databases.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Efficient join evaluation; Join transformation; Outer join evaluation; Semijoin reduction; Virtual row method", } @Article{Keogh:2009:SEI, author = "Eamonn Keogh and Li Wei and Xiaopeng Xi and Michail Vlachos and Sang-Hee Lee and Pavlos Protopapas", title = "Supporting exact indexing of arbitrarily rotated shapes and periodic time series under {Euclidean} and warping distance measures", journal = j-VLDB-J, volume = "18", number = "3", pages = "611--630", month = jun, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0111-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 14:55:19 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Shape matching and indexing is important topic in its own right, and is a fundamental subroutine in most shape data mining algorithms. Given the ubiquity of shape, shape matching is an important problem with applications in domains as diverse as biometrics, industry, medicine, zoology and anthropology. The distance/similarity measure for used for shape matching must be invariant to many distortions, including scale, offset, noise, articulation, partial occlusion, etc. Most of these distortions are relatively easy to handle, either in the representation of the data or in the similarity measure used. However, rotation invariance is noted in the literature as being an especially difficult challenge. Current approaches typically try to achieve rotation invariance in the representation of the data, at the expense of discrimination ability, or in the distance measure, at the expense of efficiency. In this work, we show that we can take the slow but accurate approaches and dramatically speed them up. On real world problems our technique can take current approaches and make them four orders of magnitude faster without false dismissals. Moreover, our technique can be used with any of the dozens of existing shape representations and with all the most popular distance measures including Euclidean distance, dynamic time warping and Longest Common Subsequence. We further show that our indexing technique can be used to index star light curves, an important type of astronomical data, without modification.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Dynamic time warping; Indexing; Shape", } @Article{Yang:2009:AIO, author = "Yin Yang and Stavros Papadopoulos and Dimitris Papadias and George Kollios", title = "Authenticated indexing for outsourced spatial databases", journal = j-VLDB-J, volume = "18", number = "3", pages = "631--648", month = jun, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0113-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 14:55:19 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In spatial database outsourcing, a data owner delegates its data management tasks to a location-based service (LBS), which indexes the data with an authenticated data structure (ADS). The LBS receives queries (ranges, nearest neighbors) originating from several clients/subscribers. Each query initiates the computation of a verification object (VO) based on the ADS. The VO is returned to the client that can verify the result correctness using the public key of the owner. Our first contribution is the MR-tree, a space-efficient ADS that supports fast query processing and verification. Our second contribution is the MR*-tree, a modified version of the MR-tree, which significantly reduces the VO size through a novel embedding technique. Finally, whereas most ADSs must be constructed and maintained by the owner, we outsource the MR- and MR*-tree construction and maintenance to the LBS, thus relieving the owner from this computationally intensive task.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Authenticated index; Database outsourcing; Mobile computing; Spatial database", } @Article{Quiane-Ruiz:2009:SAQ, author = "Jorge-Arnulfo Quian{\'e}-Ruiz and Philippe Lamarre and Patrick Valduriez", title = "A self-adaptable query allocation framework for distributed information systems", journal = j-VLDB-J, volume = "18", number = "3", pages = "649--674", month = jun, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0114-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 14:55:19 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In large-scale distributed information systems, where participants are autonomous and have special interests for some queries, query allocation is a challenge. Much work in this context has focused on distributing queries among providers in a way that maximizes overall performance (typically throughput and response time). However, preserving the participants' interests is also important. In this paper, we make the following contributions. First, we provide a model to define the participants' perception of the system regarding their interests and propose measures to evaluate the quality of query allocation methods. Then, we propose a framework for query allocation called Satisfaction-based Query Load Balancing (SQLB, for short), which dynamically trades consumers' interests for providers' interests based on their satisfaction. Finally, we compare SQLB, through experimentation, with two important baseline query allocation methods, namely Capacity based and Mariposa-like. The results demonstrate that SQLB yields high efficiency while satisfying the participants' interests and significantly outperforms the baseline methods.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Distributed information systems; Query allocation; Queryload balancing; Satisfaction", } @Article{Deng:2009:IOQ, author = "Ke Deng and Xiaofang Zhou and Heng Tao Shen and Shazia Sadiq and Xue Li", title = "Instance optimal query processing in spatial networks", journal = j-VLDB-J, volume = "18", number = "3", pages = "675--693", month = jun, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0115-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 14:55:19 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The performance optimization of query processing in spatial networks focuses on minimizing network data accesses and the cost of network distance calculations. This paper proposes algorithms for network k -NN queries, range queries, closest-pair queries and multi-source skyline queries based on a novel processing framework, namely, incremental lower bound constraint. By giving high processing priority to the query associated data points and utilizing the incremental nature of the lower bound, the performance of our algorithms is better optimized in contrast to the corresponding algorithms based on known framework incremental Euclidean restriction and incremental network expansion. More importantly, the proposed algorithms are proven to be instance optimal among classes of algorithms. Through experiments on real road network datasets, the superiority of the proposed algorithms is demonstrated.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Incremental lower bound constraint; Instance optimality; Spatial networks; Spatial queries", } @Article{Yiu:2009:MDT, author = "Man Lung Yiu and Nikos Mamoulis", title = "Multi-dimensional top-$k$ dominating queries", journal = j-VLDB-J, volume = "18", number = "3", pages = "695--718", month = jun, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0117-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 14:55:19 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The top- k dominating query returns k data objects which dominate the highest number of objects in a dataset. This query is an important tool for decision support since it provides data analysts an intuitive way for finding significant objects. In addition, it combines the advantages of top- k and skyline queries without sharing their disadvantages: (i) the output size can be controlled, (ii) no ranking functions need to be specified by users, and (iii) the result is independent of the scales at different dimensions. Despite their importance, top- k dominating queries have not received adequate attention from the research community. This paper is an extensive study on the evaluation of top- k dominating queries. First, we propose a set of algorithms that apply on indexed multi-dimensional data. Second, we investigate query evaluation on data that are not indexed. Finally, we study a relaxed variant of the query which considers dominance in dimensional subspaces. Experiments using synthetic and real datasets demonstrate that our algorithms significantly outperform a previous skyline-based approach. We also illustrate the applicability of this multi-dimensional analysis query by studying the meaningfulness of its results on real data.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Preference dominance; Score counting; Top-k retrieval", } @Article{Silva:2009:RTS, author = "Yasin N. Silva and Xiaopeng Xiong and Walid G. Aref", title = "The {RUM-tree}: supporting frequent updates in {R-trees} using memos", journal = j-VLDB-J, volume = "18", number = "3", pages = "719--738", month = jun, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0120-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 14:55:19 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The problem of frequently updating multi-dimensional indexes arises in many location-dependent applications. While the R-tree and its variants are the dominant choices for indexing multi-dimensional objects, the R-tree exhibits inferior performance in the presence of frequent updates. In this paper, we present an R-tree variant, termed the RUM-tree (which stands for R-tree with update memo) that reduces the cost of object updates. The RUM-tree processes updates in a memo-based approach that avoids disk accesses for purging old entries during an update process. Therefore, the cost of an update operation in the RUM-tree is reduced to the cost of only an insert operation. The removal of old object entries is carried out by a garbage cleaner inside the RUM-tree. In this paper, we present the details of the RUM-tree and study its properties. We also address the issues of crash recovery and concurrency control for the RUM-tree. Theoretical analysis and comprehensive experimental evaluation demonstrate that the RUM-tree outperforms other R-tree variants by up to one order of magnitude in scenarios with frequent updates.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Frequent updates; Indexing techniques; Performance; Spatio-temporal databases", } @Article{Kriakov:2009:STM, author = "Vassil Kriakov and George Kollios and Alex Delis", title = "Self-tuning management of update-intensive multidimensional data in clusters of workstations", journal = j-VLDB-J, volume = "18", number = "3", pages = "739--764", month = jun, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0121-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 14:55:19 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Contemporary applications continuously modify large volumes of multidimensional data that must be accessed efficiently and, more importantly, must be updated in a timely manner. Single-server storage approaches are insufficient when managing such volumes of data, while the high frequency of data modification render classical indexing methods inefficient. To address these two problems we introduce a distributed storage manager for multidimensional data based on a Cluster-of-Workstations. The manager addresses the above challenges through a set of mechanisms that, through selective on-line data reorganization, collectively maintain a balanced load across a cluster of workstations. With the help of both a highly efficient and speedy self-tuning mechanism, based on a new data structure called stat -index, as well as a query aggregation and clustering algorithm, our storage manager attains short query response times even in the presence of massive modifications and highly skewed access patterns. Furthermore, we provide a data migration cost model used to determine the best data redistribution strategy. Through extensive experimentation with our prototype, we establish that our storage manager can sustain significant update rates with minimal overhead.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Cluster of workstations; Multi-dimensional data; Self-tuning storage", } @Article{Cohen:2009:EQS, author = "Sara Cohen", title = "Equivalence of queries that are sensitive to multiplicities", journal = j-VLDB-J, volume = "18", number = "3", pages = "765--785", month = jun, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0122-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 14:55:19 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The query equivalence problem has been studied extensively for set-semantics and, more recently, for bag and bag-set semantics. However, SQL queries often combine set, bag and bag-set semantics. For example, an SQL query that returns a multiset of elements may call a subquery or view that returns a set of elements. Queries may access both relations that do not contain duplicates, as well as relations with duplicates. As another example, in SQL one can compute a multiset-union of queries, each of which returns a set of answers. This paper presents combined semantics, which formally models query evaluation combining set, bag and bag-set semantics. The equivalence problem for queries evaluated under combined semantics is studied. A sufficient condition for equivalence is presented. For several important common classes of queries necessary and sufficient conditions for equivalence are presented.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Bag semantics; Combined semantics; Datalog; Query equivalence; Set semantics", } @Article{Lian:2009:EPP, author = "Xiang Lian and Lei Chen", title = "Efficient processing of probabilistic reverse nearest neighbor queries over uncertain data", journal = j-VLDB-J, volume = "18", number = "3", pages = "787--808", month = jun, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0123-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 14:55:19 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Reverse nearest neighbor (RNN) search is very crucial in many real applications. In particular, given a database and a query object, an RNN query retrieves all the data objects in the database that have the query object as their nearest neighbors. Often, due to limitation of measurement devices, environmental disturbance, or characteristics of applications (for example, monitoring moving objects), data obtained from the real world are uncertain (imprecise). Therefore, previous approaches proposed for answering an RNN query over exact (precise) database cannot be directly applied to the uncertain scenario. In this paper, we re-define the RNN query in the context of uncertain databases, namely probabilistic reverse nearest neighbor (PRNN) query, which obtains data objects with probabilities of being RNNs greater than or equal to a user-specified threshold. Since the retrieval of a PRNN query requires accessing all the objects in the database, which is quite costly, we also propose an effective pruning method, called geometric pruning (GP), that significantly reduces the PRNN search space yet without introducing any false dismissals. Furthermore, we present an efficient PRNN query procedure that seamlessly integrates our pruning method. Extensive experiments have demonstrated the efficiency and effectiveness of our proposed GP-based PRNN query processing approach, under various experimental settings.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Geometric pruning; Probabilistic reverse nearest neighbor; Uncertain databases", } @Article{Hua:2009:TTQ, author = "Ming Hua and Jian Pei and Ada W. Fu and Xuemin Lin and Ho-Fung Leung", title = "Top-$k$ typicality queries and efficient query answering methods on large databases", journal = j-VLDB-J, volume = "18", number = "3", pages = "809--835", month = jun, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0128-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 14:55:19 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Finding typical instances is an effective approach to understand and analyze large data sets. In this paper, we apply the idea of typicality analysis from psychology and cognitive science to database query answering, and study the novel problem of answering top- k typicality queries. We model typicality in large data sets systematically. Three types of top- k typicality queries are formulated. To answer questions like 'Who are the top- k most typical NBA players?', the measure of simple typicality is developed. To answer questions like 'Who are the top- k most typical guards distinguishing guards from other players?', the notion of discriminative typicality is proposed. Moreover, to answer questions like 'Who are the best k typical guards in whole representing different types of guards?', the notion of representative typicality is used. Computing the exact answer to a top- k typicality query requires quadratic time which is often too costly for online query answering on large databases. We develop a series of approximation methods for various situations: (1) the randomized tournament algorithm has linear complexity though it does not provide a theoretical guarantee on the quality of the answers; (2) the direct local typicality approximation using VP-trees provides an approximation quality guarantee; (3) a local typicality tree data structure can be exploited to index a large set of objects. Then, typicality queries can be answered efficiently with quality guarantees by a tournament method based on a Local Typicality Tree. An extensive performance study using two real data sets and a series of synthetic data sets clearly shows that top- k typicality queries are meaningful and our methods are practical.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Efficient query answering; Top-k query; Typicality analysis", } @Article{Bawa:2009:PPI, author = "Mayank Bawa and Roberto J. {Bayardo, Jr.} and Rakesh Agrawal and Jaideep Vaidya", title = "Privacy-preserving indexing of documents on the network", journal = j-VLDB-J, volume = "18", number = "4", pages = "837--856", month = aug, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0129-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 14:56:20 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "With the ubiquitous collection of data and creation of large distributed repositories, enabling search over this data while respecting access control is critical. A related problem is that of ensuring privacy of the content owners while still maintaining an efficient index of distributed content. We address the problem of providing privacy-preserving search over distributed access-controlled content. Indexed documents can be easily reconstructed from conventional (inverted) indexes used in search. Currently, the need to avoid breaches of access-control through the index requires the index hosting site to be fully secured and trusted by all participating content providers. This level of trust is impractical in the increasingly common case where multiple competing organizations or individuals wish to selectively share content. We propose a solution that eliminates the need of such a trusted authority. The solution builds a centralized privacy-preserving index in conjunction with a distributed access-control enforcing search protocol. Two alternative methods to build the centralized index are proposed, allowing trade offs of efficiency and security. The new index provides strong and quantifiable privacy guarantees that hold even if the entire index is made public. Experiments on a real-life dataset validate performance of the scheme. The appeal of our solution is twofold: (a) content providers maintain complete control in defining access groups and ensuring its compliance, and (b) system implementors retain tunable knobs to balance privacy and efficiency concerns for their particular domains.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Distributed search; Indexing; Privacy", } @Article{Fan:2009:QTX, author = "Wenfei Fan and Jeffrey Xu Yu and Jianzhong Li and Bolin Ding and Lu Qin", title = "Query translation from {XPath} to {SQL} in the presence of recursive {DTDs}", journal = j-VLDB-J, volume = "18", number = "4", pages = "857--883", month = aug, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-008-0131-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 14:56:20 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We study the problem of evaluating xpath queries over xml data that is stored in an rdbms via schema-based shredding. The interaction between recursion (descendants-axis) in xpath queries and recursion in dtds makes it challenging to answer xpath queries using rdbms. We present a new approach to translating xpath queries into sql queries based on a notion of extended\par XP ath expressions and a simple least fixpoint (lfp) operator. Extended xpath expressions are a mild extension of xpath, and the lfp operator takes a single input relation and is already supported by most commercial rdbms. We show that extended xpath expressions are capable of capturing both dtd recursion and xpath queries in a uniform framework. Furthermore, they can be translated into an equivalent sequence of sql queries with the lfp operator. We present algorithms for rewriting xpath queries over a (possibly recursive) dtd into extended xpath expressions and for translating extended xpath expressions to sql queries, as well as optimization techniques. The novelty of our approach consists in its capability to answer a large class of xpath queries by means of only low-end rdbms features already available in most rdbms, as well as its flexibility to accommodate existing relational query optimization techniques. In addition, these translation algorithms provide a solution to query answering for certain (possibly recursive) xml views of xml data. Our experimental results verify the effectiveness of our techniques.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Query translation; Recursive DTD; SQL; XML database; XPath", } @Article{Malik:2009:RRA, author = "Zaki Malik and Athman Bouguettaya", title = "{RATEWeb}: {Reputation Assessment} for {Trust Establishment} among {Web} services", journal = j-VLDB-J, volume = "18", number = "4", pages = "885--911", month = aug, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-009-0138-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 14:56:20 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We introduce RATEWeb, a framework for establishing trust in service-oriented environments. RATEWeb supports a cooperative model in which Web services share their experiences of the service providers with their peers through feedback ratings. The different ratings are aggregated to derive a service provider's reputation. This in turn is used to evaluate trust. The overall goal of RATEWeb is to facilitate trust-based selection and composition of Web services. We propose a set of decentralized techniques that aim at accurately aggregating the submitted ratings for reputation assessment. We conduct experiments to assess the fairness and accuracy of the proposed techniques.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Reputation; Trust; Web service", } @Article{Wang:2009:CRE, author = "Fusheng Wang and Shaorong Liu and Peiya Liu", title = "Complex {RFID} event processing", journal = j-VLDB-J, volume = "18", number = "4", pages = "913--931", month = aug, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-009-0139-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 14:56:20 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Advances of sensor and radio frequency identification (RFID) technology provide significant new power for humans to sense, understand and manage the world. RFID provides fast data collection with precise identification of objects with unique IDs without line of sight, thus it can be used for identifying, locating, tracking and monitoring physical objects. Despite these benefits, RFID poses many challenges for data processing and management: (i) RFID observations have implicit meanings, which have to be transformed and aggregated into semantic data represented in their data models; and (ii) RFID data are temporal, streaming, and in high volume, and have to be processed on the fly. Thus, a general RFID data processing framework is needed to automate the transformation of physical RFID observations into the virtual counterparts in the virtual world linked to business applications. In this paper, we take an event-oriented approach to process RFID data, by devising RFID application logic into complex events. We then formalize the specification and semantics of RFID events and rules. We discover that RFID events are highly temporal constrained, and include non-spontaneous events, and develop an RFID event detection engine that can effectively process complex RFID events. The declarative event-based approach greatly simplifies the work of RFID data processing, and can significantly reduce the cost of RFID data integration.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Complex event; ECA rules; RFID; Temporal", } @Article{DuMouza:2009:LSI, author = "C{\'e}dric {Du Mouza} and Witold Litwin and Philippe Rigaux", title = "Large-scale indexing of spatial data in distributed repositories: the {SD}-Rtree", journal = j-VLDB-J, volume = "18", number = "4", pages = "933--958", month = aug, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-009-0135-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 14:56:20 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We propose a scalable distributed data structure (SDDS) called SD-Rtree. We intend our structure for point, window and k NN queries over large spatial datasets distributed on clusters of interconnected servers. The structure balances the storage and processing load over the available resources, and aims at minimizing the size of the cluster. SD-Rtree generalizes the well-known Rtree structure. It uses a distributed balanced binary tree that scales with insertions to potentially any number of storage servers through splits of the overloaded ones. A user/application manipulates the structure from a client node. The client addresses the tree through its image that can be possibly outdated due to later split. This may generate addressing errors, solved by the forwarding among the servers. Specific messages towards the clients incrementally correct the outdated images. We present the building of an SD-Rtree through insertions, focusing on the split and rotation algorithms. We follow with the query algorithms. We describe then a flexible allocation protocol which allows to cope with a temporary shortage of storage resources through data storage balancing. Experiments show additional aspects of SD-Rtree and compare its behavior with a distributed quadtree. The results justify our various design choices and the overall utility of the structure.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Distributed structure; Spatial indexing", } @Article{Zheng:2009:DSI, author = "Baihua Zheng and Wang-Chien Lee and Ken C. Lee and Dik Lun Lee and Min Shao", title = "A distributed spatial index for error-prone wireless data broadcast", journal = j-VLDB-J, volume = "18", number = "4", pages = "959--986", month = aug, year = "2009", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-009-0137-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Sep 15 14:56:20 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Information is valuable to users when it is available not only at the right time but also at the right place. To support efficient location-based data access in wireless data broadcast systems, a distributed spatial index (called DSI) is presented in this paper. DSI is highly efficient because it has a linear yet fully distributed structure that naturally shares links in different search paths. DSI is very resilient to the error-prone wireless communication environment because interrupted search operations based on DSI can be resumed easily. It supports search algorithms for classical location-based queries such as window queries and k NN queries in both of the snapshot and continuous query modes. In-depth analysis and simulation-based evaluation have been conducted. The results show that DSI significantly out-performs a variant of R-trees tailored for wireless data broadcast environments.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Error resilience; Location-based query; Mobile computing; Wireless broadcast", } @Article{Haas:2009:SIU, author = "Peter J. Haas and Dan Suciu", title = "Special issue on uncertain and probabilistic databases", journal = j-VLDB-J, volume = "18", number = "5", pages = "987--988", month = oct, year = "2009", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:40 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Sarma:2009:RUD, author = "Anish Das Sarma and Omar Benjelloun and Alon Halevy and Shubha Nabar and Jennifer Widom", title = "Representing uncertain data: models, properties, and algorithms", journal = j-VLDB-J, volume = "18", number = "5", pages = "989--1019", month = oct, year = "2009", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:40 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Antova:2009:WBE, author = "Lyublena Antova and Christoph Koch and Dan Olteanu", title = "$ 10^{(10^6)} $ worlds and beyond: efficient representation and processing of incomplete information", journal = j-VLDB-J, volume = "18", number = "5", pages = "1021--1040", month = oct, year = "2009", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:40 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Abiteboul:2009:EPX, author = "Serge Abiteboul and Benny Kimelfeld and Yehoshua Sagiv and Pierre Senellart", title = "On the expressiveness of probabilistic {XML} models", journal = j-VLDB-J, volume = "18", number = "5", pages = "1041--1064", month = oct, year = "2009", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:40 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Sen:2009:PME, author = "Prithviraj Sen and Amol Deshpande and Lise Getoor", title = "{PrDB}: managing and exploiting rich correlations in probabilistic databases", journal = j-VLDB-J, volume = "18", number = "5", pages = "1065--1090", month = oct, year = "2009", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:40 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Re:2009:THQ, author = "Christopher R{\'e} and Dan Suciu", title = "The trichotomy of {HAVING} queries on a probabilistic database", journal = j-VLDB-J, volume = "18", number = "5", pages = "1091--1116", month = oct, year = "2009", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:40 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Kimelfeld:2009:QEP, author = "Benny Kimelfeld and Yuri Kosharovsky and Yehoshua Sagiv", title = "Query evaluation over probabilistic {XML}", journal = j-VLDB-J, volume = "18", number = "5", pages = "1117--1140", month = oct, year = "2009", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:40 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Hassanzadeh:2009:CPD, author = "Oktie Hassanzadeh and Ren{\'e}e J. Miller", title = "Creating probabilistic databases from duplicated data", journal = j-VLDB-J, volume = "18", number = "5", pages = "1141--1166", month = oct, year = "2009", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:40 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wolf:2009:QPI, author = "Garrett Wolf and Aravind Kalavagattu and Hemal Khatri and Raju Balakrishnan and Bhaumik Chokshi and Jianchun Fan and Yi Chen and Subbarao Kambhampati", title = "Query processing over incomplete autonomous databases: query rewriting using learned data dependencies", journal = j-VLDB-J, volume = "18", number = "5", pages = "1167--1190", month = oct, year = "2009", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:40 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Keulen:2009:QEK, author = "Maurice Keulen and Ander Keijzer", title = "Qualitative effects of knowledge rules and user feedback in probabilistic data integration", journal = j-VLDB-J, volume = "18", number = "5", pages = "1191--1217", month = oct, year = "2009", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:40 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chen:2009:SPS, author = "Jinchuan Chen and Reynold Cheng and Mohamed Mokbel and Chi-Yin Chow", title = "Scalable processing of snapshot and continuous nearest-neighbor queries over one-dimensional uncertain data", journal = j-VLDB-J, volume = "18", number = "5", pages = "1219--1240", month = oct, year = "2009", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:40 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chen:2010:TFD, author = "Keke Chen and Ling Liu", title = "{HE-Tree}: a framework for detecting changes in clustering structure for categorical data streams", journal = j-VLDB-J, volume = "18", number = "6", pages = "1241--1260", month = dec, year = "2010", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:44 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Whang:2010:GER, author = "Steven Euijong Whang and Omar Benjelloun and Hector Garcia-Molina", title = "Generic entity resolution with negative rules", journal = j-VLDB-J, volume = "18", number = "6", pages = "1261--1277", month = dec, year = "2010", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:44 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ntarmos:2010:SSI, author = "Nikos Ntarmos and Peter Triantafillou and Gerhard Weikum", title = "Statistical structures for {Internet}-scale data management", journal = j-VLDB-J, volume = "18", number = "6", pages = "1279--1312", month = dec, year = "2010", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:44 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Bramandia:2010:OUR, author = "Ramadhana Bramandia and Jiefeng Cheng and Byron Choi and Jeffrey Xu Yu", title = "Optimizing updates of recursive {XML} views of relations", journal = j-VLDB-J, volume = "18", number = "6", pages = "1313--1333", month = dec, year = "2010", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:44 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Duntgen:2010:BBM, author = "Christian D{\"u}ntgen and Thomas Behr and Ralf Hartmut G{\"u}ting", title = "{BerlinMOD}: a benchmark for moving object databases", journal = j-VLDB-J, volume = "18", number = "6", pages = "1335--1368", month = dec, year = "2010", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:44 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Mandreoli:2010:PHS, author = "Federica Mandreoli and Riccardo Martoglia and Pavel Zezula", title = "Principles of {Holism} for sequential twig pattern matching", journal = j-VLDB-J, volume = "18", number = "6", pages = "1369--1392", month = dec, year = "2010", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:44 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Buneman:2010:SIB, author = "Peter Buneman and Volker Markl and Beng Chin Ooi and Kenneth Ross", title = "Special issue: best papers of {VLDB 2008}", journal = j-VLDB-J, volume = "19", number = "1", pages = "1--2", month = feb, year = "2010", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:46 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Cormode:2010:MFF, author = "Graham Cormode and Marios Hadjieleftheriou", title = "Methods for finding frequent items in data streams", journal = j-VLDB-J, volume = "19", number = "1", pages = "3--20", month = feb, year = "2010", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:46 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Bruno:2010:CPD, author = "Nicolas Bruno and Surajit Chaudhuri", title = "Constrained physical design tuning", journal = j-VLDB-J, volume = "19", number = "1", pages = "21--44", month = feb, year = "2010", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:46 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lizorkin:2010:AEO, author = "Dmitry Lizorkin and Pavel Velikhov and Maxim Grinev and Denis Turdakov", title = "Accuracy estimate and optimization techniques for {SimRank} computation", journal = j-VLDB-J, volume = "19", number = "1", pages = "45--66", month = feb, year = "2010", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:46 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Nath:2010:OMV, author = "Suman Nath and Phillip B. Gibbons", title = "Online maintenance of very large random samples on flash storage", journal = j-VLDB-J, volume = "19", number = "1", pages = "67--90", month = feb, year = "2010", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:46 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Neumann:2010:RES, author = "Thomas Neumann and Gerhard Weikum", title = "The {RDF-3X} engine for scalable management of {RDF} data", journal = j-VLDB-J, volume = "19", number = "1", pages = "91--113", month = feb, year = "2010", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:46 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Cormode:2010:ABG, author = "Graham Cormode and Divesh Srivastava and Ting Yu and Qing Zhang", title = "Anonymizing bipartite graph data using safe groupings", journal = j-VLDB-J, volume = "19", number = "1", pages = "115--139", month = feb, year = "2010", CODEN = "VLDBFR", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 16 08:21:46 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{U:2010:CSA, author = "Leong Hou U. and Kyriakos Mouratidis and Nikos Mamoulis", title = "Continuous spatial assignment of moving users", journal = j-VLDB-J, volume = "19", number = "2", pages = "141--160", month = apr, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-009-0144-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 21 16:41:50 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Consider a set of servers and a set of users, where each server has a coverage region (i.e., an area of service) and a capacity (i.e., a maximum number of users it can serve). Our task is to assign every user to one server subject to the coverage and capacity constraints. To offer the highest quality of service, we wish to minimize the average distance between users and their assigned server. This is an instance of a well-studied problem in operations research, termed optimal assignment. Even though there exist several solutions for the static case (where user locations are fixed), there is currently no method for dynamic settings. In this paper, we consider the continuous assignment problem (CAP), where an optimal assignment must be constantly maintained between mobile users and a set of servers. The fact that the users are mobile necessitates real-time reassignment so that the quality of service remains high (i.e., their distance from their assigned servers is minimized). The large scale and the time-critical nature of targeted applications require fast CAP solutions. We propose an algorithm that utilizes the geometric characteristics of the problem and significantly accelerates the initial assignment computation and its subsequent maintenance. Our method applies to different cost functions (e.g., average squared distance) and to any Minkowski distance metric (e.g., Euclidean, L$_1$ norm, etc.).", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Continuous query; Optimal assignment; Spatial monitoring", } @Article{Papadopoulos:2010:CAR, author = "Stavros Papadopoulos and Yin Yang and Dimitris Papadias", title = "Continuous authentication on relational streams", journal = j-VLDB-J, volume = "19", number = "2", pages = "161--180", month = apr, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-009-0145-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 21 16:41:50 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "According to the database outsourcing model, a data owner delegates database functionality to a third-party service provider, which answers queries received from clients. Authenticated query processing enables the clients to verify the correctness of query results. Despite the abundance of methods for authenticated processing in conventional databases, there is limited work on outsourced data streams. Stream environments pose new challenges such as the need for fast structure updating, support for continuous query processing and authentication, and provision for temporal completeness. Specifically, in addition to the correctness of individual results, the client must be able to verify that there are no missing results in between data updates. This paper presents a comprehensive set of methods covering relational streams. We first describe REF, a technique that achieves correctness and temporal completeness but incurs false transmissions, i.e., the provider has to inform the clients whenever there is a data update, even if their results are not affected. Then, we propose CADS, which minimizes the processing and transmission overhead through an elaborate indexing scheme and a virtual caching mechanism. In addition, we present an analytical study to determine the optimal indexing granularity, and extend CADS for the case that the data distribution changes over time. Finally, we evaluate the effectiveness of our techniques through extensive experiments.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Authentication; Continuous monitoring; Data streams; Database outsourcing", } @Article{Zhang:2010:UMS, author = "Zhenjie Zhang and Hua Lu and Beng Chin Ooi and Anthony K. Tung", title = "Understanding the meaning of a shifted sky: a general framework on extending skyline query", journal = j-VLDB-J, volume = "19", number = "2", pages = "181--201", month = apr, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-009-0148-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 21 16:41:50 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Skyline queries are often used on data sets in multi-dimensional space for many decision-making applications. Traditionally, an object p is said to dominate another object q if, for all dimensions, it is no worse than q and is better on at least one dimension. Therefore, the skyline of a data set consists of all objects not dominated by any other object. To better cater to application requirements such as controlling the size of the skyline or handling data sets that are not well-structured, various works have been proposed to extend the definition of skyline based on variants of the dominance relationship. In view of the proliferation of variants, in this paper, a generalized framework is proposed to guide the extension of skyline query from conventional definition to different variants. Our framework explicitly and carefully examines the various properties that should be preserved in a variant of the dominance relationship so that: (1) maintaining original advantages, while extending adaptivity to application semantics, and (2) keeping computational complexity almost unaffected. We prove that traditional dominance is the only relationship satisfying all desirable properties, and present some new dominance relationships by relaxing some of the properties. These relationships are general enough for us to design new top- k skyline queries that return robust results of a controllable size. We analyze the existing skyline algorithms based on their minimum requirements on dominance properties. We also extend our analysis to data sets with missing values, and present extensive experimental results on the combinations of new dominance relationships and skyline algorithms.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "General framework; Skyline query", } @Article{Lo:2010:FTD, author = "Eric Lo and Carsten Binnig and Donald Kossmann and M. Tamer {\"O}zsu and Wing-Kai Hon", title = "A framework for testing {DBMS} features", journal = j-VLDB-J, volume = "19", number = "2", pages = "203--230", month = apr, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-009-0157-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 21 16:41:50 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Testing a specific feature of a DBMS requires controlling the inputs and outputs of the operators in the query execution plan. However, that is practically difficult to achieve because the inputs/outputs of a query depend on the content of the test database. In this paper, we propose a framework to test DBMS features. The framework includes a database generator called QAGen so that the generated test databases are able to meet the test requirements defined on the test queries. The framework also includes a set of tools to automate test case constructions and test executions. A wide range of DBMS feature testing tasks can be facilitated by the proposed framework.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Data generation; Database testing; Symbolic execution; Symbolic query processing", } @Article{Bonifati:2010:SMQ, author = "Angela Bonifati and Elaine Chang and Terence Ho and Laks V. Lakshmanan and Rachel Pottinger and Yongik Chung", title = "Schema mapping and query translation in heterogeneous {P2P XML} databases", journal = j-VLDB-J, volume = "19", number = "2", pages = "231--256", month = apr, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-009-0159-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 21 16:41:50 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Peers in a peer-to-peer data management system often have heterogeneous schemas and no mediated global schema. To translate queries across peers, we assume each peer provides correspondences between its schema and a small number of other peer schemas. We focus on query reformulation in the presence of heterogeneous XML schemas, including data---metadata conflicts. We develop an algorithm for inferring precise mapping rules from informal schema correspondences. We define the semantics of query answering in this setting and develop query translation algorithm. Our translation handles an expressive fragment of XQuery and works both along and against the direction of mapping rules. We describe the HePToX heterogeneous P2P XML data management system which incorporates our results. We report the results of extensive experiments on HePToX on both synthetic and real datasets. We demonstrate our system utility and scalability on different P2P distributions.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Heterogeneous Peer-to-Peer XML databases; Schema mapping; XML query translation", } @Article{Morfonios:2010:RCL, author = "Konstantinos Morfonios and Yannis Ioannidis", title = "Revisiting the cube lifecycle in the presence of hierarchies", journal = j-VLDB-J, volume = "19", number = "2", pages = "257--282", month = apr, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-009-0160-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 21 16:41:50 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "On-line analytical processing (OLAP) typically involves complex aggregate queries over large datasets. The data cube has been proposed as a structure that materializes the results of such queries in order to accelerate OLAP. A significant fraction of the related work has been on Relational-OLAP (ROLAP) techniques, which are based on relational technology. Existing ROLAP cubing solutions mainly focus on 'flat' datasets, which do not include hierarchies in their dimensions. Nevertheless, as shown in this paper, the nature of hierarchies introduces several complications into the entire lifecycle of a data cube including the operations of construction, storage, indexing, query processing, and incremental maintenance. This fact renders existing techniques essentially inapplicable in a significant number of real-world applications and mandates revisiting the entire cube lifecycle under the new perspective. In order to overcome this problem, the CURE algorithm has been recently proposed as an efficient mechanism to construct complete cubes over large datasets with arbitrary hierarchies and store them in a highly compressed format, compatible with the relational model. In this paper, we study the remaining phases in the cube lifecycle and introduce query-processing and incremental-maintenance algorithms for CURE cubes. These are significantly different from earlier approaches, which have been proposed for flat cubes constructed by other techniques and are inadequate for CURE due to its high compression rate and the presence of hierarchies. Our methods address issues such as cube indexing, query optimization, and lazy update policies. Especially regarding updates, such lazy approaches are applied for the first time on cubes. We demonstrate the effectiveness of CURE in all phases of the cube lifecycle through experiments on both real-world and synthetic datasets. Among the experimental results, we distinguish those that have made CURE the first ROLAP technique to complete the construction and usage of the cube of the highest-density dataset in the APB-1 benchmark (12 GB). CURE was in fact quite efficient on this, showing great promise with respect to the potential of the technique overall.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Data cube; Incremental maintenance; Lazy update; Query processing", } @Article{Zhang:2010:TBP, author = "Wenjie Zhang and Xuemin Lin and Ying Zhang and Jian Pei and Wei Wang", title = "Threshold-based probabilistic top-$k$ dominating queries", journal = j-VLDB-J, volume = "19", number = "2", pages = "283--305", month = apr, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-009-0162-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 21 16:41:50 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Recently, due to intrinsic characteristics in many underlying data sets, a number of probabilistic queries on uncertain data have been investigated. Top-$k$ dominating queries are very important in many applications including decision making in a multidimensional space. In this paper, we study the problem of efficiently computing top-$k$ dominating queries on uncertain data. We first formally define the problem. Then, we develop an efficient, threshold-based algorithm to compute the exact solution. To overcome some inherent computational deficiency in an exact computation, we develop an efficient randomized algorithm with an accuracy guarantee. Our extensive experiments demonstrate that both algorithms are quite efficient, while the randomized algorithm is quite scalable against data set sizes, object areas, $k$ values, etc. The randomized algorithm is also highly accurate in practice.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Dominating relation; Top $k$; Uncertain objects", } @Article{Nutanong:2010:AEV, author = "Sarana Nutanong and Rui Zhang and Egemen Tanin and Lars Kulik", title = "Analysis and evaluation of {V*-kNN}: an efficient algorithm for moving {kNN} queries", journal = j-VLDB-J, volume = "19", number = "3", pages = "307--332", month = jun, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-009-0163-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Aug 18 12:05:52 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The moving $k$ nearest neighbor (M k NN) query continuously finds the $k$ nearest neighbors of a moving query point. M k NN queries can be efficiently processed through the use of safe regions. In general, a safe region is a region within which the query point can move without changing the query answer. This paper presents an incremental safe-region-based technique for answering M k NN queries, called the V*-Diagram, as well as analysis and evaluation of its associated algorithm, V*-kNN. Traditional safe-region approaches compute a safe region based on the data objects but independent of the query location. Our approach exploits the knowledge of the query location and the boundary of the search space in addition to the data objects. As a result, V*-kNN has much smaller I/O and computation costs than existing methods. We further provide cost models to estimate the number of data accesses for V*-kNN and a competitive technique, RIS-kNN. The V*-Diagram and V*-kNN are also applicable to the domain of spatial networks and we present algorithms to construct a spatial-network V*-Diagram. Our experimental results show that V*-kNN significantly outperforms the competitive technique. The results also verify the accuracy of the cost models.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Nearest neighbor search; Spatial databases", } @Article{Lee:2010:ZSE, author = "Ken C. Lee and Wang-Chien Lee and Baihua Zheng and Huajing Li and Yuan Tian", title = "{Z-SKY}: an efficient skyline query processing framework based on {Z}-order", journal = j-VLDB-J, volume = "19", number = "3", pages = "333--362", month = jun, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-009-0166-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Aug 18 12:05:52 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Given a set of data points in a multidimensional space, a skyline query retrieves those data points that are not dominated by any other point in the same dataset. Observing that the properties of Z-order space filling curves (or Z-order curves) perfectly match with the dominance relationships among data points in a geometrical data space, we, in this paper, develop and present a novel and efficient processing framework to evaluate skyline queries and their variants, and to support skyline result updates based on Z-order curves. This framework consists of ZBtree, i.e., an index structure to organize a source dataset and skyline candidates, and a suite of algorithms, namely, (1) ZSearch, which processes skyline queries, (2) ZInsert, ZDelete and ZUpdate, which incrementally maintain skyline results in presence of source dataset updates, (3) ZBand, which answers skyband queries, (4) ZRank, which returns top-ranked skyline points, (5) k-ZSearch, which evaluates k -dominant skyline queries, and (6) ZSubspace, which supports skyline queries on a subset of dimensions. While derived upon coherent ideas and concepts, our approaches are shown to outperform the state-of-the-art algorithms that are specialized to address particular skyline problems, especially when a large number of skyline points are resulted, via comprehensive experiments.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Index; Search algorithm; Skyline query; Skyline query result update; Z-order space filling curve", } @Article{Yiu:2010:ESS, author = "Man Lung Yiu and Gabriel Ghinita and Christian S. Jensen and Panos Kalnis", title = "Enabling search services on outsourced private spatial data", journal = j-VLDB-J, volume = "19", number = "3", pages = "363--384", month = jun, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-009-0169-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Aug 18 12:05:52 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Cloud computing services enable organizations and individuals to outsource the management of their data to a service provider in order to save on hardware investments and reduce maintenance costs. Only authorized users are allowed to access the data. Nobody else, including the service provider, should be able to view the data. For instance, a real-estate company that owns a large database of properties wants to allow its paying customers to query for houses according to location. On the other hand, the untrusted service provider should not be able to learn the property locations and, e.g., selling the information to a competitor. To tackle the problem, we propose to transform the location datasets before uploading them to the service provider. The paper develops a spatial transformation that re-distributes the locations in space, and it also proposes a cryptographic-based transformation. The data owner selects the transformation key and shares it with authorized users. Without the key, it is infeasible to reconstruct the original data points from the transformed points. The proposed transformations present distinct trade-offs between query efficiency and data confidentiality. In addition, we describe attack models for studying the security properties of the transformations. Empirical studies demonstrate that the proposed methods are efficient and applicable in practice.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Data outsourcing; Spatial query processing", } @Article{Hintoglu:2010:SMP, author = "Ay{\c{c}}a Azgin Hintoglu and Y{\"u}cel Sayg{\i}n", title = "Suppressing microdata to prevent classification based inference", journal = j-VLDB-J, volume = "19", number = "3", pages = "385--410", month = jun, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-009-0170-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Aug 18 12:05:52 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The revolution of the Internet together with the progression in computer technology makes it easy for institutions to collect an unprecedented amount of personal data. This pervasive data collection rally coupled with the increasing necessity of dissemination and sharing of non-aggregated data, i.e., microdata, raised a lot of concerns about privacy. One method to ensure privacy is to selectively hide the confidential, i.e. sensitive, information before disclosure. However, with data mining techniques, it is now possible for an adversary to predict the hidden confidential information from the disclosed data sets. In this paper, we concentrate on one such data mining technique called classification. We extend our previous work on microdata suppression to prevent both probabilistic and decision tree classification based inference. We also provide experimental results showing the effectiveness of not only the proposed methods but also the hybrid methods, i.e., methods suppressing microdata against both classification models, on real-life data sets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Data mining; Data perturbation; Data suppression; Disclosure protection; Privacy", } @Article{Jin:2010:SWT, author = "Cheqing Jin and Ke Yi and Lei Chen and Jeffrey Xu Yu and Xuemin Lin", title = "Sliding-window top-$k$ queries on uncertain streams", journal = j-VLDB-J, volume = "19", number = "3", pages = "411--435", month = jun, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-009-0171-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Aug 18 12:05:52 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Recently, due to the imprecise nature of the data generated from a variety of streaming applications, such as sensor networks, query processing on uncertain data streams has become an important problem. However, all the existing works on uncertain data streams study unbounded streams. In this paper, we take the first step towards the important and challenging problem of answering sliding-window queries on uncertain data streams, with a focus on one of the most important types of queries--top- k queries. It is nontrivial to find an efficient solution for answering sliding-window top- k queries on uncertain data streams, because challenges not only stem from the strict space and time requirements of processing both arriving and expiring tuples in high-speed streams, but also rise from the exponential blowup in the number of possible worlds induced by the uncertain data model. In this paper, we design a unified framework for processing sliding-window top- k queries on uncertain streams. We show that all the existing top- k definitions in the literature can be plugged into our framework, resulting in several succinct synopses that use space much smaller than the window size, while they are also highly efficient in terms of processing time. We also extend our framework to answering multiple top- k queries. In addition to the theoretical space and time bounds that we prove for these synopses, we present a thorough experimental report to verify their practical efficiency on both synthetic and real data.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Sliding-window; Top-k query; Uncertain stream", } @Article{Pang:2010:EPE, author = "Hweehwa Pang and Xuhua Ding and Baihua Zheng", title = "Efficient processing of exact top-$k$ queries over disk-resident sorted lists", journal = j-VLDB-J, volume = "19", number = "3", pages = "437--456", month = jun, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-009-0174-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Aug 18 12:05:52 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The top- k query is employed in a wide range of applications to generate a ranked list of data that have the highest aggregate scores over certain attributes. As the pool of attributes for selection by individual queries may be large, the data are indexed with per-attribute sorted lists, and a threshold algorithm (TA) is applied on the lists involved in each query. The TA executes in two phases--find a cut-off threshold for the top- k result scores, then evaluate all the records that could score above the threshold. In this paper, we focus on exact top- k queries that involve monotonic linear scoring functions over disk-resident sorted lists. We introduce a model for estimating the depths to which each sorted list needs to be processed in the two phases, so that (most of) the required records can be fetched efficiently through sequential or batched I/Os. We also devise a mechanism to quickly rank the data that qualify for the query answer and to eliminate those that do not, in order to reduce the computation demand of the query processor. Extensive experiments with four different datasets confirm that our schemes achieve substantial performance speed-up of between two times and two orders of magnitude over existing TAs, at the expense of a memory overhead of 4.8 bits per attribute value. Moreover, our scheme is robust to different data distributions and query characteristics.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Bloom filter; Threshold algorithm; Top-k query processing", } @Article{Murugesan:2010:EPP, author = "Mummoorthy Murugesan and Wei Jiang and Chris Clifton and Luo Si and Jaideep Vaidya", title = "Efficient privacy-preserving similar document detection", journal = j-VLDB-J, volume = "19", number = "4", pages = "457--475", month = aug, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-009-0175-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Aug 18 12:06:22 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Similar document detection plays important roles in many applications, such as file management, copyright protection, plagiarism prevention, and duplicate submission detection. The state of the art protocols assume that the contents of files stored on a server (or multiple servers) are directly accessible. However, this makes such protocols unsuitable for any environment where the documents themselves are sensitive and cannot be openly read. Essentially, this assumption limits more practical applications, e.g., detecting plagiarized documents between two conferences, where submissions are confidential. We propose novel protocols to detect similar documents between two entities where documents cannot be openly shared with each other. The similarity measure used can be a simple cosine similarity on entire documents or on document fragments, enabling detection of partial copying. We conduct extensive experiments to show the practical value of the proposed protocols. While the proposed base protocols are much more efficient than the general secure multiparty computation based solutions, they are still slow for large document sets. We then investigate a clustering based approach that significantly reduces the running time and achieves over 90\% of accuracy in our experiments. This makes secure similar document detection both practical and feasible.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Information retrieval; Privacy", } @Article{Soliman:2010:SRQ, author = "Mohamed A. Soliman and Ihab F. Ilyas and Shalev Ben-David", title = "Supporting ranking queries on uncertain and incomplete data", journal = j-VLDB-J, volume = "19", number = "4", pages = "477--501", month = aug, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-009-0176-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Aug 18 12:06:22 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Large databases with uncertain information are becoming more common in many applications including data integration, location tracking, and Web search. In these applications, ranking records with uncertain attributes introduces new problems that are fundamentally different from conventional ranking. Specifically, uncertainty in records' scores induces a partial order over records, as opposed to the total order that is assumed in the conventional ranking settings. In this paper, we present a new probabilistic model, based on partial orders, to encapsulate the space of possible rankings originating from score uncertainty. Under this model, we formulate several ranking query types with different semantics. We describe and analyze a set of efficient query evaluation algorithms. We show that our techniques can be used to solve the problem of rank aggregation in partial orders under two widely adopted distance metrics. In addition, we design sampling techniques based on Markov chains to compute approximate query answers. Our experimental evaluation uses both real and synthetic data. The experimental study demonstrates the efficiency and effectiveness of our techniques under various configurations.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Kendall tau; Partial orders; Probabilistic data; Rank aggregation; Ranking; Top-k; Uncertain data", } @Article{Lee:2010:SCE, author = "Ki-Hoon Lee and Kyu-Young Whang and Wook-Shin Han and Min-Soo Kim", title = "Structural consistency: enabling {XML} keyword search to eliminate spurious results consistently", journal = j-VLDB-J, volume = "19", number = "4", pages = "503--529", month = aug, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-009-0177-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Aug 18 12:06:22 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "XML keyword search is a user-friendly way to query XML data using only keywords. In XML keyword search, to achieve high precision without sacrificing recall, it is important to remove spurious results not intended by the user. Efforts to eliminate spurious results have enjoyed some success using the concepts of LCA or its variants, SLCA and MLCA. However, existing methods still could find many spurious results. The fundamental cause for the occurrence of spurious results is that the existing methods try to eliminate spurious results locally without global examination of all the query results and, accordingly, some spurious results are not consistently eliminated. In this paper, we propose a novel keyword search method that removes spurious results consistently by exploiting the new concept of structural consistency. We define structural consistency as a property that is preserved if there is no query result having an ancestor-descendant relationship at the schema level with any other query results. A naive solution to obtain structural consistency would be to compute all the LCAs (or variants) and then to remove spurious results according to structural consistency. Obviously, this approach would always be slower than existing LCA-based ones. To speed up structural consistency checking, we must be able to examine the query results at the schema level without generating all the LCAs. However, this is a challenging problem since the schema-level query results do not homomorphically map to the instance-level query results, causing serious false dismissal. We present a comprehensive and practical solution to this problem and formally prove that this solution preserves structural consistency at the schema level without incurring false dismissal. We also propose a relevance-feedback-based solution for the problem where our method has low recall, which occurs when it is not the user's intention to find more specific results. This solution has been prototyped in a full-fledged object-relational DBMS Odysseus developed at KAIST. Experimental results using real and synthetic data sets show that, compared with the state-of-the-art methods, our solution significantly (1) improves precision while providing comparable recall for most queries and (2) enhances the query performance by removing spurious results early.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Keyword search; Odysseus DBMS; Spurious results; Structural consistency; Structural summary; XML", } @Article{Lucchese:2010:RPT, author = "Claudio Lucchese and Michail Vlachos and Deepak Rajan and Philip S. Yu", title = "Rights protection of trajectory datasets with nearest-neighbor preservation", journal = j-VLDB-J, volume = "19", number = "4", pages = "531--556", month = aug, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0178-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Aug 18 12:06:22 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Companies frequently outsource datasets to mining firms, and academic institutions create repositories or share datasets in the interest of promoting research collaboration. Still, many practitioners have reservations about sharing or outsourcing datasets, primarily because of fear of losing the principal rights over the dataset. This work presents a way of convincingly claiming ownership rights over a trajectory dataset, without, at the same time, destroying the salient dataset characteristics, which are important for accurate search operations and data-mining tasks. The digital watermarking methodology that we present distorts imperceptibly a collection of sequences, effectively embedding a secret key, while retaining as well as possible the neighborhood of each object, which is vital for operations such as similarity search, classification, or clustering. A key contribution in this methodology is a technique for discovering the maximum distortion that still maintains such desirable properties. We demonstrate both analytically and empirically that the proposed dataset marking techniques can withstand a number of attacks (such a translation, rotation, noise addition, etc) and therefore can provide a robust framework for facilitating the secure dissemination of trajectory datasets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Nearest neighbors; Rights protection; Time-series; Trajectories; Watermarking", } @Article{Zhang:2010:SMA, author = "Rui Zhang and Nick Koudas and Beng Chin Ooi and Divesh Srivastava and Pu Zhou", title = "Streaming multiple aggregations using phantoms", journal = j-VLDB-J, volume = "19", number = "4", pages = "557--583", month = aug, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0180-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Aug 18 12:06:22 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Data streams characterize the high speed and large volume input of a new class of applications such as network monitoring, web content analysis and sensor networks. Among these applications, network monitoring may be the most compelling one--the backbone of a large internet service provider can generate 1 petabyte of data per day. For many network monitoring tasks such as traffic analysis and statistics collection, aggregation is a primitive operation. Various analytical and statistical needs naturally lead to related aggregate queries. In this article, we address the problem of efficiently computing multiple aggregations over high-speed data streams based on the two-level query processing architecture of GS, a real data stream management system deployed in AT \& T. We discern that additionally computing and maintaining fine-granularity aggregations (called phantoms) has the benefit of supporting shared computation. Based on a thorough analysis, we propose algorithms to identify the best set of phantoms to maintain and determine allocation of resources (particularly, space) to compute the aggregations. Experiments show that our algorithm achieves near-optimal computation costs, which outperforms the best adapted algorithm by more than an order of magnitude.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Aggregation; Data stream; GS; Multiple-query optimization; Phantom", } @Article{Jeung:2010:PPP, author = "Hoyoung Jeung and Man Lung Yiu and Xiaofang Zhou and Christian S. Jensen", title = "Path prediction and predictive range querying in road network databases", journal = j-VLDB-J, volume = "19", number = "4", pages = "585--602", month = aug, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0181-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Aug 18 12:06:22 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In automotive applications, movement-path prediction enables the delivery of predictive and relevant services to drivers, e.g., reporting traffic conditions and gas stations along the route ahead. Path prediction also enables better results of predictive range queries and reduces the location update frequency in vehicle tracking while preserving accuracy. Existing moving-object location prediction techniques in spatial-network settings largely target short-term prediction that does not extend beyond the next road junction. To go beyond short-term prediction, we formulate a network mobility model that offers a concise representation of mobility statistics extracted from massive collections of historical object trajectories. The model aims to capture the turning patterns at junctions and the travel speeds on road segments at the level of individual objects. Based on the mobility model, we present a maximum likelihood and a greedy algorithm for predicting the travel path of an object (for a time duration h into the future). We also present a novel and efficient server-side indexing scheme that supports predictive range queries on the mobility statistics of the objects. Empirical studies with real data suggest that our proposals are effective and efficient.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "Mobility statistics; Path prediction; Predictive range query; Road network database", } @Article{Ali:2010:MAA, author = "Mohammed Eunus Ali and Egemen Tanin and Rui Zhang and Lars Kulik", title = "A motion-aware approach for efficient evaluation of continuous queries on {$3$D} object databases", journal = j-VLDB-J, volume = "19", number = "5", pages = "603--632", month = oct, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1145/1873117.1873119", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Oct 29 17:56:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Askitis:2010:ESC, author = "Nikolas Askitis and Ranjan Sinha", title = "Engineering scalable, cache and space efficient tries for strings", journal = j-VLDB-J, volume = "19", number = "5", pages = "633--660", month = oct, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1145/1873117.1873121", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Oct 29 17:56:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wu:2010:EEG, author = "Xiaoying Wu and Dimitri Theodoratos and Calisto Zuzarte", title = "Efficient evaluation of generalized tree-pattern queries on {XML} streams", journal = j-VLDB-J, volume = "19", number = "5", pages = "661--686", month = oct, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1145/1873117.1873120", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Oct 29 17:56:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Guting:2010:ENN, author = "Ralf Hartmut G{\"u}ting and Thomas Behr and Jianqiu Xu", title = "Efficient $k$-nearest neighbor search on moving object trajectories", journal = j-VLDB-J, volume = "19", number = "5", pages = "687--714", month = oct, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1145/1873117.1873123", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Oct 29 17:56:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2010:TQT, author = "Feifei Li and Ke Yi and Wangchao Le", title = "Top-$k$ queries on temporal data", journal = j-VLDB-J, volume = "19", number = "5", pages = "715--733", month = oct, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1145/1873117.1873122", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Oct 29 17:56:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Duda:2010:PBI, author = "Cristian Duda and Donald Kossmann and Chong Zhou", title = "Predicate-based indexing for desktop search", journal = j-VLDB-J, volume = "19", number = "5", pages = "735--758", month = oct, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1145/1873117.1873124", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Oct 29 17:56:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Bohm:2010:F, author = "Klemens B{\"o}hm and Laks V. Lakshmanan", title = "Foreword", journal = j-VLDB-J, volume = "19", number = "6", pages = "759--760", month = dec, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0201-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Feb 7 10:43:41 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Carmel:2010:SBW, author = "David Carmel and Haggai Roitman and Elad Yom-Tov", title = "Social bookmark weighting for search and recommendation", journal = j-VLDB-J, volume = "19", number = "6", pages = "761--775", month = dec, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0211-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Feb 7 10:43:41 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Social bookmarking enables knowledge sharing and efficient discovery on the web, where users can collaborate together by tagging documents of interests. A lot of attention was given lately for utilizing social bookmarking data to enhance traditional IR tasks. Yet, much less attention was given to the problem of estimating the effectiveness of an individual bookmark for the specific tasks. In this work, we propose a novel framework for social bookmark weighting which allows us to estimate the effectiveness of each of the bookmarks individually for several IR tasks. We show that by weighting bookmarks according to their estimated quality, we can significantly improve social search effectiveness.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Squicciarini:2010:PPS, author = "Anna C. Squicciarini and Mohamed Shehab and Joshua Wede", title = "Privacy policies for shared content in social network sites", journal = j-VLDB-J, volume = "19", number = "6", pages = "777--796", month = dec, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0193-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Feb 7 10:43:41 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Social networking is one of the major technological phenomena of the Web 2.0, with hundreds of millions of subscribed users. Social networks enable a form of self-expression for users and help them to socialize and share content with other users. In spite of the fact that content sharing represents one of the prominent features of existing Social network sites, they do not provide any mechanisms for collective management of privacy settings for shared content. In this paper, using game theory, we model the problem of collective enforcement of privacy policies on shared data. In particular, we propose a solution that offers automated ways to share images based on an extended notion of content ownership.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Hay:2010:RSR, author = "Michael Hay and Gerome Miklau and David Jensen and Don Towsley and Chao Li", title = "Resisting structural re-identification in anonymized social networks", journal = j-VLDB-J, volume = "19", number = "6", pages = "797--823", month = dec, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0210-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Feb 7 10:43:41 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We identify privacy risks associated with releasing network datasets and provide an algorithm that mitigates those risks. A network dataset is a graph representing entities connected by edges representing relations such as friendship, communication or shared activity. Maintaining privacy when publishing a network dataset is uniquely challenging because an individual's network context can be used to identify them even if other identifying information is removed. In this paper, we introduce a parameterized model of structural knowledge available to the adversary and quantify the success of attacks on individuals in anonymized networks. We show that the risks of these attacks vary based on network structure and size and provide theoretical results that explain the anonymity risk in random networks.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Gruhl:2010:MSI, author = "Daniel Gruhl and Meena Nagarajan and Jan Pieper and Christine Robson and Amit Sheth", title = "Multimodal social intelligence in a real-time dashboard system", journal = j-VLDB-J, volume = "19", number = "6", pages = "825--848", month = dec, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0207-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Feb 7 10:43:41 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Social Networks provide one of the most rapidly evolving data sets in existence today. Traditional Business Intelligence applications struggle to take advantage of such data sets in a timely manner. The BBC SoundIndex, developed by the authors and others, enabled real-time analytics of music popularity using data from a variety of Social Networks. We present this system as a grounding example of how to overcome the challenges of working with this data from social networks. We discuss a variety of technologies to implement near real-time data analytics to transform Social Intelligence into Business Intelligence and evaluate their effectiveness in the music domain.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Benz:2010:SBP, author = "Dominik Benz and Andreas Hotho and Robert J{\"a}schke and Beate Krause and Folke Mitzlaff and Christoph Schmitz and Gerd Stumme", title = "The social bookmark and publication management system bibsonomy", journal = j-VLDB-J, volume = "19", number = "6", pages = "849--875", month = dec, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0208-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Feb 7 10:43:41 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Social resource sharing systems are central elements of the Web 2.0 and use the same kind of lightweight knowledge representation, called folksonomy. Their large user communities and ever-growing networks of user-generated content have made them an attractive object of investigation for researchers from different disciplines like Social Network Analysis, Data Mining, Information Retrieval or Knowledge Discovery. In this paper, we summarize and extend our work on different aspects of this branch of Web 2.0 research, demonstrated and evaluated within our own social bookmark and publication sharing system BibSonomy, which is currently among the three most popular systems of its kind. We structure this presentation along the different interaction phases of a user with our system, coupling the relevant research questions of each phase with the corresponding implementation issues.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Roy:2010:SEG, author = "Senjuti Basu Roy and Sihem Amer-Yahia and Ashish Chawla and Gautam Das and Cong Yu", title = "Space efficiency in group recommendation", journal = j-VLDB-J, volume = "19", number = "6", pages = "877--900", month = dec, year = "2010", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0209-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Feb 7 10:43:41 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Imagine a system that gives you satisfying recommendations when you want to rent a movie with friends or find a restaurant to celebrate a colleague's farewell: at the core of such a system is what we call group recommendation. While computing individual recommendations have received lots of attention (e.g., Netflix prize), group recommendation has been confined to studying users' satisfaction with different aggregation strategies. In this paper (Some results are published in an earlier conference paper (Amer-Yahia et al. in VLDB, 2009). See Sect. ``Paper contributions and outline'' for details.), we describe the challenges and desiderata of group recommendation and formalize different group consensus semantics that account for both an item's predicted ratings to the group members and the disagreements among them.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2011:PBK, author = "Guoliang Li and Jianhua Feng and Xiaofang Zhou and Jianyong Wang", title = "Providing built-in keyword search capabilities in {RDBMS}", journal = j-VLDB-J, volume = "20", number = "1", pages = "1--19", month = feb, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0188-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Feb 7 10:43:36 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A common approach to performing keyword search over relational databases is to find the minimum Steiner trees in database graphs transformed from relational data. These methods, however, are rather expensive as the minimum Steiner tree problem is known to be NP-hard. Further, these methods are independent of the underlying relational database management system (RDBMS), thus cannot benefit from the capabilities of the RDBMS. As an alternative, in this paper we propose a new concept called Compact Steiner Tree (CSTree), which can be used to approximate the Steiner tree problem for answering top-$k$ keyword queries efficiently. We propose a novel structure-aware index, together with an effective ranking mechanism for fast, progressive and accurate retrieval of top-$k$ highest ranked CSTrees.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Cai:2011:SKD, author = "Deng Cai and Xiaofei He and Jiawei Han", title = "Speed up kernel discriminant analysis", journal = j-VLDB-J, volume = "20", number = "1", pages = "21--33", month = feb, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0189-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Feb 7 10:43:36 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Linear discriminant analysis (LDA) has been a popular method for dimensionality reduction, which preserves class separability. The projection vectors are commonly obtained by maximizing the between-class covariance and simultaneously minimizing the within-class covariance. LDA can be performed either in the original input space or in the reproducing kernel Hilbert space (RKHS) into which data points are mapped, which leads to kernel discriminant analysis (KDA). When the data are highly nonlinear distributed, KDA can achieve better performance than LDA. However, computing the projective functions in KDA involves eigen-decomposition of kernel matrix, which is very expensive when a large number of training samples exist.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Qin:2011:SKS, author = "Lu Qin and Jeffrey Xu Yu and Lijun Chang", title = "Scalable keyword search on large data streams", journal = j-VLDB-J, volume = "20", number = "1", pages = "35--57", month = feb, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0190-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Feb 7 10:43:36 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "It is widely recognized that the integration of information retrieval (IR) and database (DB) techniques provides users with a broad range of high quality services. Along this direction, IR-styled $m$-keyword query processing over a relational database in an rdbms framework has been well studied. It finds all hidden interconnected tuple structures, for example connected trees that contain keywords and are interconnected by sequences of primary/foreign key relationships among tuples. A new challenging issue is how to monitor events that are implicitly interrelated over an open-ended relational data stream for a user-given $m$-keyword query. Such a relational data stream is a sequence of tuple insertion/deletion operations.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Cao:2011:SSA, author = "Jianneng Cao and Panagiotis Karras and Panos Kalnis and Kian-Lee Tan", title = "{SABRE}: a {Sensitive Attribute Bucketization and REdistribution} framework for $t$-closeness", journal = j-VLDB-J, volume = "20", number = "1", pages = "59--81", month = feb, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0191-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Feb 7 10:43:36 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Today, the publication of microdata poses a privacy threat: anonymous personal records can be re-identified using third data sources. Past research has tried to develop a concept of privacy guarantee that an anonymized data set should satisfy before publication, culminating in the notion of $t$-closeness. To satisfy $t$-closeness, the records in a data set need to be grouped into Equivalence Classes (ECs), such that each EC contains records of indistinguishable quasi-identifier values, and its local distribution of sensitive attribute (SA) values conforms to the global table distribution of SA values. However, despite this progress, previous research has not offered an anonymization algorithm tailored for $t$-closeness.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Terrovitis:2011:LGR, author = "Manolis Terrovitis and Nikos Mamoulis and Panos Kalnis", title = "Local and global recoding methods for anonymizing set-valued data", journal = j-VLDB-J, volume = "20", number = "1", pages = "83--106", month = feb, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0192-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Feb 7 10:43:36 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper, we study the problem of protecting privacy in the publication of set-valued data. Consider a collection of supermarket transactions that contains detailed information about items bought together by individuals. Even after removing all personal characteristics of the buyer, which can serve as links to his identity, the publication of such data is still subject to privacy attacks from adversaries who have partial knowledge about the set. Unlike most previous works, we do not distinguish data as sensitive and non-sensitive, but we consider them both as potential quasi-identifiers and potential sensitive data, depending on the knowledge of the adversary.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lian:2011:PIR, author = "Xiang Lian and Lei Chen", title = "Probabilistic inverse ranking queries in uncertain databases", journal = j-VLDB-J, volume = "20", number = "1", pages = "107--127", month = feb, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0195-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Feb 7 10:43:36 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Query processing in the uncertain database has become increasingly important due to the wide existence of uncertain data in many real applications. Different from handling precise data, the uncertain query processing needs to consider the data uncertainty and answer queries with confidence guarantees. In this paper, we formulate and tackle an important query, namely probabilistic inverse ranking (PIR) query, which retrieves possible ranks of a given query object in an uncertain database with confidence above a probability threshold. We present effective pruning methods to reduce the PIR search space, which can be seamlessly integrated into an efficient query procedure. Moreover, we tackle the problem of PIR query processing in high dimensional spaces, which reduces high dimensional uncertain data to a lower dimensional space.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Hua:2011:RQU, author = "Ming Hua and Jian Pei and Xuemin Lin", title = "Ranking queries on uncertain data", journal = j-VLDB-J, volume = "20", number = "1", pages = "129--153", month = feb, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0196-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Feb 7 10:43:36 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Uncertain data is inherent in a few important applications. It is far from trivial to extend ranking queries (also known as top-$k$ queries), a popular type of queries on certain data, to uncertain data. In this paper, we cast ranking queries on uncertain data using three parameters: rank threshold $k$, probability threshold $p$, and answer set size threshold $l$. Systematically, we identify four types of ranking queries on uncertain data. First, a probability threshold top-$k$ query computes the uncertain records taking a probability of at least $p$ to be in the top-k list. Second, a top-$ (k, l)$ query returns the top-$l$ uncertain records whose probabilities of being ranked among top-$k$ are the largest.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Abiteboul:2011:SIB, author = "Serge Abiteboul and Volker Markl and Tova Milo and Jignesh Patel", title = "Special issue: best papers of {VLDB} 2009", journal = j-VLDB-J, volume = "20", number = "2", pages = "155--156", month = apr, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0222-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 13 17:51:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Mindolin:2011:PEP, author = "Denis Mindolin and Jan Chomicki", title = "Preference elicitation in prioritized skyline queries", journal = j-VLDB-J, volume = "20", number = "2", pages = "157--182", month = apr, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0227-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 13 17:51:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Preference queries incorporate the notion of binary preference relation into relational database querying. Instead of returning all the answers, such queries return only the best answers, according to a given preference relation. Preference queries are a fast growing area of database research. Skyline queries constitute one of the most thoroughly studied classes of preference queries. A well-known limitation of skyline queries is that skyline preference relations assign the same importance to all attributes. In this work, we study p-skyline queries that generalize skyline queries by allowing varying attribute importance in preference relations. We perform an in-depth study of the properties of p-skyline preference relations.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Denev:2011:SFD, author = "Dimitar Denev and Arturas Mazeika and Marc Spaniol and Gerhard Weikum", title = "The {SHARC} framework for data quality in {Web} archiving", journal = j-VLDB-J, volume = "20", number = "2", pages = "183--207", month = apr, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0219-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 13 17:51:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Web archives preserve the history of born-digital content and offer great potential for sociologists, business analysts, and legal experts on intellectual property and compliance issues. Data quality is crucial for these purposes. Ideally, crawlers should gather coherent captures of entire Web sites, but the politeness etiquette and completeness requirement mandate very slow, long-duration crawling while Web sites undergo changes. This paper presents the SHARC framework for assessing the data quality in Web archives and for tuning capturing strategies toward better quality with given resources. We define data quality measures, characterize their properties, and develop a suite of quality-conscious scheduling strategies for archive crawling.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Elmeleegy:2011:HRT, author = "Hazem Elmeleegy and Jayant Madhavan and Alon Halevy", title = "Harvesting relational tables from lists on the {Web}", journal = j-VLDB-J, volume = "20", number = "2", pages = "209--226", month = apr, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0223-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 13 17:51:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A large number of web pages contain data structured in the form of ``lists''. Many such lists can be further split into multi-column tables, which can then be used in more semantically meaningful tasks. However, harvesting relational tables from such lists can be a challenging task. The lists are manually generated and hence need not have well-defined templates--they have inconsistent delimiters (if any) and often have missing information. We propose a novel technique for extracting tables from lists. The technique is domain independent and operates in a fully unsupervised manner. We first use multiple sources of information to split individual lines into multiple fields and then, compare the splits across multiple lines to identify and fix incorrect splits and bad alignments.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Candea:2011:PPH, author = "George Candea and Neoklis Polyzotis and Radek Vingralek", title = "Predictable performance and high query concurrency for data analytics", journal = j-VLDB-J, volume = "20", number = "2", pages = "227--248", month = apr, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0221-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 13 17:51:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Conventional data warehouses employ the query-at-a-time model, which maps each query to a distinct physical plan. When several queries execute concurrently, this model introduces contention and thrashing, because the physical plans--unaware of each other--compete for access to the underlying I/O and computation resources. As a result, while modern systems can efficiently optimize and evaluate a single complex data analysis query, their performance suffers significantly and can be highly erratic when multiple complex queries run at the same time. We present in this paper Cjoin, a new design that substantially improves throughput in large-scale data analytics systems processing many concurrent join queries.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2011:UAR, author = "Jian Li and Barna Saha and Amol Deshpande", title = "A unified approach to ranking in probabilistic databases", journal = j-VLDB-J, volume = "20", number = "2", pages = "249--275", month = apr, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0220-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 13 17:51:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Ranking is a fundamental operation in data analysis and decision support and plays an even more crucial role if the dataset being explored exhibits uncertainty. This has led to much work in understanding how to rank the tuples in a probabilistic dataset in recent years. In this article, we present a unified approach to ranking and top-k query processing in probabilistic databases by viewing it as a multi-criterion optimization problem and by deriving a set of features that capture the key properties of a probabilistic dataset that dictate the ranked result. We contend that a single, specific ranking function may not suffice for probabilistic databases, and we instead propose two parameterized ranking functions, called PRF \ldots{}", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Gottlob:2011:NOS, author = "Georg Gottlob and Reinhard Pichler and Vadim Savenkov", title = "Normalization and optimization of schema mappings", journal = j-VLDB-J, volume = "20", number = "2", pages = "277--302", month = apr, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0226-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 13 17:51:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Schema mappings are high-level specifications that describe the relationship between database schemas. They are an important tool in several areas of database research, notably in data integration and data exchange. However, a concrete theory of schema mapping optimization including the formulation of optimality criteria and the construction of algorithms for computing optimal schema mappings is completely lacking to date. The goal of this work is to fill this gap. We start by presenting a system of rewrite rules to minimize sets of source-to-target tuple-generating dependencies. Moreover, we show that the result of this minimization is unique up to variable renaming. Hence, our optimization also yields a schema mapping normalization.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Cho:2011:LRM, author = "Chung-Wen Cho and Yi-Hung Wu and Show-Jane Yen and Ying Zheng and Arbee L. Chen", title = "On-line rule matching for event prediction", journal = j-VLDB-J, volume = "20", number = "3", pages = "303--334", month = jun, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0197-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Jun 14 11:27:46 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The prediction of future events has great importance in many applications. The prediction is based on episode rules which are composed of events and two time constraints which require all the events in the episode rule and in the predicate of the rule to occur in a time interval, respectively. In an event stream, a sequence of events which matches the predicate of the rule satisfying the specified time constraint is called an occurrence of the predicate. After finding the occurrence, the consequent event which will occur in a time interval can be predicted. However, the time intervals computed from some occurrences for predicting the event can be contained in the time intervals computed from other occurrence and become redundant.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Liu:2011:MLD, author = "Jun Liu and Lu Jiang and Zhaohui Wu and Qinghua Zheng and Yanan Qian", title = "Mining learning-dependency between knowledge units from text", journal = j-VLDB-J, volume = "20", number = "3", pages = "335--345", month = jun, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0198-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Jun 14 11:27:46 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Identifying learning-dependency among the knowledge units (KU) is a preliminary requirement of navigation learning. Methods based on link mining lack the ability of discovering such dependencies among knowledge units that are arranged in a linear way in the text. In this paper, we propose a method of mining the learning- dependencies among the KU from text document. This method is based on two features that we found and studied from the KU and the learning-dependencies among them. They are the distributional asymmetry of the domain terms and the local nature of the learning-dependency, respectively. Our method consists of three stages, (1) Build document association relationship by calculating the distributional asymmetry of the domain terms.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2011:LBM, author = "Rui Wang and Betty Salzberg and David Lomet", title = "Log-based middleware server recovery with transaction support", journal = j-VLDB-J, volume = "20", number = "3", pages = "347--370", month = jun, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0199-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Jun 14 11:27:46 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Providing enterprises with reliable and available Web-based application programs is a challenge. Applications are traditionally spread over multiple nodes, from user (client), to middle tier servers, to back end transaction systems, e.g. databases. It has proven very difficult to ensure that these applications persist across system crashes so that ``exactly once'' execution is produced, always important and sometimes essential, e.g., in the financial area. Our system provides a framework for exactly once execution of multi-tier Web applications, built on a commercially available Web infrastructure. Its capabilities include low logging overhead, recovery isolation (independence), and consistency between mid-tier and transactional back end.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Gao:2011:CVN, author = "Yunjun Gao and Baihua Zheng and Gencai Chen and Qing Li and Xiaofa Guo", title = "Continuous visible nearest neighbor query processing in spatial databases", journal = j-VLDB-J, volume = "20", number = "3", pages = "371--396", month = jun, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0200-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Jun 14 11:27:46 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper, we identify and solve a new type of spatial queries, called continuous visible nearest neighbor (CVNN) search. Given a data set P, an obstacle set O, and a query line segment q in a two-dimensional space, a CVNN query returns a set of $$ {\langle p, R \rangle } $$ tuples such that \ldots{}", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Luo:2011:QRI, author = "Bo Luo and Dongwon Lee and Wang-Chien Lee and Peng Liu", title = "{QFilter}: rewriting insecure {XML} queries to secure ones using non-deterministic finite automata", journal = j-VLDB-J, volume = "20", number = "3", pages = "397--415", month = jun, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0202-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Jun 14 11:27:46 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper, we ask whether XML access control can be supported when underlying (XML or relational) storage system does not provide adequate security features and propose three alternative solutions --primitive, pre-processing, and post-processing. Toward that scenario, in particular, we advocate a scalable and effective pre-processing approach, called QFilter. QFilter is based on non-deterministic finite automata (NFA) and rewrites user's queries such that parts violating access control rules are pre-pruned.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Motahari-Nezhad:2011:ECP, author = "Hamid Reza Motahari-Nezhad and Regis Saint-Paul and Fabio Casati and Boualem Benatallah", title = "Event correlation for process discovery from web service interaction logs", journal = j-VLDB-J, volume = "20", number = "3", pages = "417--444", month = jun, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0203-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Jun 14 11:27:46 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Understanding, analyzing, and ultimately improving business processes is a goal of enterprises today. These tasks are challenging as business processes in modern enterprises are implemented over several applications and Web services, and the information about process execution is scattered across several data sources. Understanding modern business processes entails identifying the correlation between events in data sources in the context of business processes (event correlation is the process of finding relationships between events that belong to the same process execution instance). In this paper, we investigate the problem of event correlation for business processes that are realized through the interactions of a set of Web services.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chakrabarti:2011:IDQ, author = "Soumen Chakrabarti and Amit Pathak and Manish Gupta", title = "Index design and query processing for graph conductance search", journal = j-VLDB-J, volume = "20", number = "3", pages = "445--470", month = jun, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0204-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Jun 14 11:27:46 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/pagerank.bib; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Graph conductance queries, also known as personalized PageRank and related to random walks with restarts, were originally proposed to assign a hyperlink-based prestige score to Web pages. More general forms of such queries are also very useful for ranking in entity-relation (ER) graphs used to represent relational, XML and hypertext data. Evaluation of PageRank usually involves a global eigen computation. If the graph is even moderately large, interactive response times may not be possible. Recently, the need for interactive PageRank evaluation has increased. The graph may be fully known only when the query is submitted. Browsing actions of the user may change some inputs to the PageRank computation dynamically.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chen:2011:PAD, author = "Shaoping Chen and Yi-Cheng Tu and Yuni Xia", title = "Performance analysis of a dual-tree algorithm for computing spatial distance histograms", journal = j-VLDB-J, volume = "20", number = "4", pages = "471--494", month = aug, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0205-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Sep 16 19:01:00 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Many scientific and engineering fields produce large volume of spatiotemporal data. The storage, retrieval, and analysis of such data impose great challenges to database systems design. Analysis of scientific spatiotemporal data often involves computing functions of all point-to-point interactions. One such analytics, the Spatial Distance Histogram (SDH), is of vital importance to scientific discovery. Recently, algorithms for efficient SDH processing in large-scale scientific databases have been proposed. These algorithms adopt a recursive tree-traversing strategy to process point-to-point distances in the visited tree nodes in batches, thus require less time when compared to the brute-force approach where all pairwise distances have to be computed.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Fan:2011:DCR, author = "Wenfei Fan and Hong Gao and Xibei Jia and Jianzhong Li and Shuai Ma", title = "Dynamic constraints for record matching", journal = j-VLDB-J, volume = "20", number = "4", pages = "495--520", month = aug, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0206-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Sep 16 19:01:00 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This paper investigates constraints for matching records from unreliable data sources. (a) We introduce a class of matching dependencies (mds) for specifying the semantics of unreliable data. As opposed to static constraints for schema design, mds are developed for record matching, and are defined in terms of similarity predicates and a dynamic semantics. (b) We identify a special case of mds, referred to as relative candidate keys (rcks), to determine what attributes to compare and how to compare them when matching records across possibly different relations. (c) We propose a mechanism for inferring mds, a departure from traditional implication analysis, such that when we cannot match records by comparing attributes that contain errors, we may still find matches by using other, more reliable attributes.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Cheng:2011:FGQ, author = "James Cheng and Yiping Ke and Ada Wai-Chee Fu and Jeffrey Xu Yu", title = "Fast graph query processing with a low-cost index", journal = j-VLDB-J, volume = "20", number = "4", pages = "521--539", month = aug, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0212-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Sep 16 19:01:00 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This paper studies the problem of processing supergraph queries, that is, given a database containing a set of graphs, find all the graphs in the database of which the query graph is a supergraph. Existing works usually construct an index and performs a filtering-and-verification process, which still requires many subgraph isomorphism testings. There are also significant overheads in both index construction and maintenance. In this paper, we design a graph querying system that achieves both fast indexing and efficient query processing. The index is constructed by a simple but fast method of extracting the commonality among the graphs, which does not involve any costly operation such as graph mining.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Mascetti:2011:PGS, author = "Sergio Mascetti and Dario Freni and Claudio Bettini and X. Sean Wang and Sushil Jajodia", title = "Privacy in geo-social networks: proximity notification with untrusted service providers and curious buddies", journal = j-VLDB-J, volume = "20", number = "4", pages = "541--566", month = aug, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0213-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Sep 16 19:01:00 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A major feature of the emerging geo-social networks is the ability to notify a user when any of his friends (also called buddies) happens to be geographically in proximity. This proximity service is usually offered by the network itself or by a third party service provider (SP) using location data acquired from the users. This paper provides a rigorous theoretical and experimental analysis of the existing solutions for the location privacy problem in proximity services. This is a serious problem for users who do not trust the SP to handle their location data and would only like to release their location information in a generalized form to participating buddies.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Mohammed:2011:AMG, author = "Noman Mohammed and Benjamin C. Fung and Mourad Debbabi", title = "Anonymity meets game theory: secure data integration with malicious participants", journal = j-VLDB-J, volume = "20", number = "4", pages = "567--588", month = aug, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-010-0214-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Sep 16 19:01:00 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Data integration methods enable different data providers to flexibly integrate their expertise and deliver highly customizable services to their customers. Nonetheless, combining data from different sources could potentially reveal person-specific sensitive information. In VLDBJ 2006, Jiang and Clifton (Very Large Data Bases J (VLDBJ) 15(4):316---333, 2006) propose a secure Distributed k-Anonymity (DkA) framework for integrating two private data tables to a k-anonymous table in which each private table is a vertical partition on the same set of records. Their proposed DkA framework is not scalable to large data sets. Moreover, DkA is limited to a two-party scenario and the parties are assumed to be semi-honest.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ahmad:2011:IAS, author = "Mumtaz Ahmad and Ashraf Aboulnaga and Shivnath Babu and Kamesh Munagala", title = "Interaction-aware scheduling of report-generation workloads", journal = j-VLDB-J, volume = "20", number = "4", pages = "589--615", month = aug, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0217-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Sep 16 19:01:00 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The typical workload in a database system consists of a mix of multiple queries of different types that run concurrently. Interactions among the different queries in a query mix can have a significant impact on database performance. Hence, optimizing database performance requires reasoning about query mixes rather than considering queries individually. Current database systems lack the ability to do such reasoning. We propose a new approach based on planning experiments and statistical modeling to capture the impact of query interactions. Our approach requires no prior assumptions about the internal workings of the database system or the nature and cause of query interactions, making it portable across systems.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2011:EFF, author = "Guoliang Li and Shengyue Ji and Chen Li and Jianhua Feng", title = "Efficient fuzzy full-text type-ahead search", journal = j-VLDB-J, volume = "20", number = "4", pages = "617--640", month = aug, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0218-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Sep 16 19:01:00 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Traditional information systems return answers after a user submits a complete query. Users often feel ``left in the dark'' when they have limited knowledge about the underlying data and have to use a try-and-see approach for finding information. A recent trend of supporting autocomplete in these systems is a first step toward solving this problem. In this paper, we study a new information-access paradigm, called ``type-ahead search'' in which the system searches the underlying data ``on the fly'' as the user types in query keywords. It extends autocomplete interfaces by allowing keywords to appear at different places in the underlying data.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Guting:2011:SID, author = "Ralf Hartmut G{\"u}ting and Nikos Mamoulis", title = "Special issue on data management for mobile services", journal = j-VLDB-J, volume = "20", number = "5", pages = "641--642", month = oct, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0250-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Dec 15 07:28:25 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Popa:2011:INT, author = "Iulian Sandu Popa and Karine Zeitouni and Vincent Oria and Dominique Barth and Sandrine Vial", title = "Indexing in-network trajectory flows", journal = j-VLDB-J, volume = "20", number = "5", pages = "643--669", month = oct, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0236-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Dec 15 07:28:25 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lange:2011:ERT, author = "Ralph Lange and Frank D{\"u}rr and Kurt Rothermel", title = "Efficient real-time trajectory tracking", journal = j-VLDB-J, volume = "20", number = "5", pages = "671--694", month = oct, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0237-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Dec 15 07:28:25 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Giannotti:2011:UCH, author = "Fosca Giannotti and Mirco Nanni and Dino Pedreschi and Fabio Pinelli and Chiara Renso and Salvatore Rinzivillo and Roberto Trasarti", title = "Unveiling the complexity of human mobility by querying and mining massive trajectory data", journal = j-VLDB-J, volume = "20", number = "5", pages = "695--719", month = oct, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0244-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Dec 15 07:28:25 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Timko:2011:SSA, author = "Igor Timko and Michael B{\"o}hlen and Johann Gamper", title = "Sequenced spatiotemporal aggregation for coarse query granularities", journal = j-VLDB-J, volume = "20", number = "5", pages = "721--741", month = oct, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0247-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Dec 15 07:28:25 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Guo:2011:DBS, author = "Xi Guo and Baihua Zheng and Yoshiharu Ishikawa and Yunjun Gao", title = "Direction-based surrounder queries for mobile recommendations", journal = j-VLDB-J, volume = "20", number = "5", pages = "743--766", month = oct, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0241-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Dec 15 07:28:25 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Trajcevski:2011:RCN, author = "Goce Trajcevski and Roberto Tamassia and Isabel F. Cruz and Peter Scheuermann and David Hartglass and Christopher Zamierowski", title = "Ranking continuous nearest neighbors for uncertain trajectories", journal = j-VLDB-J, volume = "20", number = "5", pages = "767--791", month = oct, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0249-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Dec 15 07:28:25 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Rao:2011:STE, author = "Weixiong Rao and Lei Chen and Ada Wai-Chee Fu", title = "{STAIRS}: {Towards} efficient full-text filtering and dissemination in {DHT} environments", journal = j-VLDB-J, volume = "20", number = "6", pages = "793--817", month = dec, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0224-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Dec 15 07:28:26 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Nowadays ``live'' content, such as weblog, wikipedia, and news, is ubiquitous in the Internet. Providing users with relevant content in a timely manner becomes a challenging problem. Differing from Web search technologies and RSS feeds/reader applications, this paper envisions a personalized full-text content filtering and dissemination system in a highly distributed environment such as a Distributed Hash Table (DHT) based Peer-to-Peer (P2P) Network. Users subscribe to their interested content by specifying input keywords and thresholds as filters. Then, content is disseminated to those users having interest in it. In the literature, full-text document publishing in DHTs has suffered for a long time from the high cost of forwarding a document to home nodes of all distinct terms.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lian:2011:STS, author = "Xiang Lian and Lei Chen", title = "Shooting top-$k$ stars in uncertain databases", journal = j-VLDB-J, volume = "20", number = "6", pages = "819--840", month = dec, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0225-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Dec 15 07:28:26 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Query processing in the uncertain database has played an important role in many real-world applications due to the wide existence of uncertain data. Although many previous techniques can correctly handle precise data, they are not directly applicable to the uncertain scenario. In this article, we investigate and propose a novel query, namely probabilistic top-k star (PTkS) query, which aims to retrieve k objects in an uncertain database that are ``closest'' to a static/dynamic query point, considering both distance and probability aspects. In order to efficiently answer PTkS queries with a static/moving query point, we propose effective pruning methods to reduce the PTkS search space, which can be seamlessly integrated into an efficient query procedure.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Perez-Sorrosal:2011:ESC, author = "Francisco Perez-Sorrosal and Marta Pati{\~n}o-Martinez and Ricardo Jimenez-Peris and Bettina Kemme", title = "Elastic {SI-Cache}: consistent and scalable caching in multi-tier architectures", journal = j-VLDB-J, volume = "20", number = "6", pages = "841--865", month = dec, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0228-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Dec 15 07:28:26 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The new vision of cloud computing demands scalable, available and autonomic software platforms in order to deploy applications and services accessible anywhere and anytime. Multi-tier architectures are an important building block for many applications that are deployed in the cloud. This paper presents a novel caching and replication infrastructure that facilitates the scalable and elastic deployment of multi-tier architectures. Our Elastic SI-Cache is a novel multi-version cache that attains high performance and consistency in multi-tier systems. In contrast to most existing caches, Elastic SI-Cache provides snapshot isolation coherently across all tiers. Furthermore, Elastic SI-Cache supports scalable replication of the different tiers where replicas can be added or removed dynamically as needed, making the cache amenable for cloud computing environments.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Moga:2011:USC, author = "Alexandru Moga and Irina Botan and Nesime Tatbul", title = "{UpStream}: storage-centric load management for streaming applications with update semantics", journal = j-VLDB-J, volume = "20", number = "6", pages = "867--892", month = dec, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0229-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Dec 15 07:28:26 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This paper addresses the problem of minimizing the staleness of query results for streaming applications with update semantics under overload conditions. Staleness is a measure of how out-of-date the results are compared with the latest data arriving on the input. Real-time streaming applications are subject to overload due to unpredictably increasing data rates, while in many of them, we observe that data streams and queries in fact exhibit ``update semantics'' (i.e., the latest input data are all that really matters when producing a query result). Under such semantics, overload will cause staleness to build up. The key to avoid this is to exploit the update semantics of applications as early as possible in the processing pipeline.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wong:2011:MBR, author = "Raymond Chi-Wing Wong and M. Tamer {\"O}zsu and Ada Wai-Chee Fu and Philip S. Yu and Lian Liu and Yubao Liu", title = "Maximizing bichromatic reverse nearest neighbor for {Lp-norm} in two- and three-dimensional spaces", journal = j-VLDB-J, volume = "20", number = "6", pages = "893--919", month = dec, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0230-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Dec 15 07:28:26 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Bichromatic reverse nearest neighbor (BRNN) has been extensively studied in spatial database literature. In this paper, we study a related problem called MaxBRNN: find an optimal region that maximizes the size of BRNNs for L p -norm in two- and three- dimensional spaces. Such a problem has many real-life applications, including the problem of finding a new server point that attracts as many customers as possible by proximity.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Tiakas:2011:PPS, author = "Eleftherios Tiakas and Apostolos N. Papadopoulos and Yannis Manolopoulos", title = "Progressive processing of subspace dominating queries", journal = j-VLDB-J, volume = "20", number = "6", pages = "921--948", month = dec, year = "2011", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0231-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Dec 15 07:28:26 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A top-k dominating query reports the k items with the highest domination score. Algorithms for efficient processing of this query have been recently proposed in the literature. Those methods, either index based or index free, apply a series of pruning criteria toward efficient processing. However, they are characterized by several limitations, such as (1) they lack progressiveness (they report the k best items at the end of the processing), (2) they require a multi-dimensional index or they build a grid-based index on-the-fly, which suffers from performance degradation, especially in high dimensionalities, and (3) they do not support vertically decomposed data. In this paper, we design efficient algorithms that can handle any subset of the dimensions in a progressive manner.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Mueller:2012:SNF, author = "Rene Mueller and Jens Teubner and Gustavo Alonso", title = "Sorting networks on {FPGAs}", journal = j-VLDB-J, volume = "21", number = "1", pages = "1--23", month = feb, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0232-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Jan 31 06:48:57 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Computer architectures are quickly changing toward heterogeneous many-core systems. Such a trend opens up interesting opportunities but also raises immense challenges since the efficient use of heterogeneous many-core systems is not a trivial problem. Software-configurable microprocessors and FPGAs add further diversity but also increase complexity. In this paper, we explore the use of sorting networks on field-programmable gate arrays (FPGAs). FPGAs are very versatile in terms of how they can be used and can also be added as additional processing units in standard CPU sockets. Our results indicate that efficient usage of FPGAs involves non-trivial aspects such as having the right computation model (a sorting network in this case); a careful implementation that balances all the design constraints in an FPGA; and the proper integration strategy to link the FPGA to the rest of the system.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Georgoulas:2012:DSE, author = "Konstantinos Georgoulas and Yannis Kotidis", title = "Distributed similarity estimation using derived dimensions", journal = j-VLDB-J, volume = "21", number = "1", pages = "25--50", month = feb, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0233-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Jan 31 06:48:57 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Computing the similarity between data objects is a fundamental operation for many distributed applications such as those on the World Wide Web, in Peer-to-Peer networks, or even in Sensor Networks. In our work, we provide a framework based on Random Hyperplane Projection (RHP) that permits continuous computation of similarity estimates (using the cosine similarity or the correlation coefficient as the preferred similarity metric) between data descriptions that are streamed from remote sites. These estimates are computed at a monitoring node, without the need for transmitting the actual data values. The original RHP framework is data agnostic and works for arbitrary data sets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Deutch:2012:TIT, author = "Daniel Deutch and Tova Milo", title = "Type inference and type checking for queries over execution traces", journal = j-VLDB-J, volume = "21", number = "1", pages = "51--68", month = feb, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0234-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Jan 31 06:48:57 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We study here Type Inference and Type Checking for queries over the execution traces of Business Processes. We define formal models for such execution traces, allowing to capture various realistic scenarios of partial information about these traces. We then define corresponding notions of types, and the problems of type inference and type checking in this context. We further provide a comprehensive study of the decidability and complexity of these problems, in various cases, and suggest efficient algorithms where possible.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Cheema:2012:CRN, author = "Muhammad Aamir Cheema and Wenjie Zhang and Xuemin Lin and Ying Zhang and Xuefei Li", title = "Continuous reverse $k$ nearest neighbors queries in {Euclidean} space and in spatial networks", journal = j-VLDB-J, volume = "21", number = "1", pages = "69--95", month = feb, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0235-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Jan 31 06:48:57 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper, we study the problem of continuous monitoring of reverse k nearest neighbors queries in Euclidean space as well as in spatial networks. Existing techniques are sensitive toward objects and queries movement. For example, the results of a query are to be recomputed whenever the query changes its location. We present a framework for continuous reverse k nearest neighbor (RkNN) queries by assigning each object and query with a safe region such that the expensive recomputation is not required as long as the query and objects remain in their respective safe regions. This significantly improves the computation cost. As a byproduct, our framework also reduces the communication cost in client---server architectures because an object does not report its location to the server unless it leaves its safe region or the server sends a location update request.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zou:2012:APM, author = "Lei Zou and Lei Chen and M. Tamer {\"O}zsu and Dongyan Zhao", title = "Answering pattern match queries in large graph databases via graph embedding", journal = j-VLDB-J, volume = "21", number = "1", pages = "97--120", month = feb, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0238-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Jan 31 06:48:57 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The growing popularity of graph databases has generated interesting data management problems, such as subgraph search, shortest path query, reachability verification, and pattern matching. Among these, a pattern match query is more flexible compared with a subgraph search and more informative compared with a shortest path or a reachability query. In this paper, we address distance-based pattern match queries over a large data graph G. Due to the huge search space, we adopt a filter-and-refine framework to answer a pattern match query over a large graph. We first find a set of candidate matches by a graph embedding technique and then evaluate these to find the exact matches.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Hartmann:2012:DES, author = "Sven Hartmann and Markus Kirchberg and Sebastian Link", title = "Design by example for {SQL} table definitions with functional dependencies", journal = j-VLDB-J, volume = "21", number = "1", pages = "121--144", month = feb, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0239-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Jan 31 06:48:57 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A database is C-Armstrong for a given set of constraints in a class C if it satisfies every constraint of the set and violates every constraint in C not implied by the set. Therefore, Armstrong databases are test data that perfectly illustrate the current perceptions about the semantics of a schema. We extend the existing theory of Armstrong relations to a toolbox of Armstrong tables. That is, we investigate structural and computational properties of Armstrong tables for the class of functional dependencies (FDs) over SQL tables. Relations are special instances of SQL tables with no duplicate rows and no null value occurrences.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Guravannavar:2012:WSO, author = "Ravindra Guravannavar and S. Sudarshan and Ajit A. Diwan and Ch. Sobhan Babu", title = "Which sort orders are interesting?", journal = j-VLDB-J, volume = "21", number = "1", pages = "145--165", month = feb, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0240-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Jan 31 06:48:57 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Sort orders play an important role in query evaluation. Algorithms that rely on sorting are widely used to implement joins, grouping, duplicate elimination and other set operations. The notion of interesting orders has allowed query optimizers to consider plans that could be locally sub-optimal, but produce ordered output beneficial for other operators, and thus be part of a globally optimal plan. However, the number of interesting orders for most operators is factorial in the number of attributes involved. Optimizer implementations use heuristics to prune the number of interesting orders, but the quality of the heuristics is unclear. Increasingly complex decision support queries and increasing use of query-covering indices, which provide multiple alternative sort orders for relations, motivate us to better address the problem of choosing interesting orders.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Atzeni:2012:SIB, author = "Paolo Atzeni and Elisa Bertino and Masaru Kitsuregawa and Kian-Lee Tan", title = "Special issue: best papers of {VLDB 2010}", journal = j-VLDB-J, volume = "21", number = "2", pages = "167--168", month = apr, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0267-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Apr 23 08:02:21 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Bu:2012:HAL, author = "Yingyi Bu and Bill Howe and Magdalena Balazinska and Michael D. Ernst", title = "The {HaLoop} approach to large-scale iterative data analysis", journal = j-VLDB-J, volume = "21", number = "2", pages = "169--190", month = apr, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0269-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Apr 23 08:02:21 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The growing demand for large-scale data mining and data analysis applications has led both industry and academia to design new types of highly scalable data-intensive computing platforms. MapReduce has enjoyed particular success. However, MapReduce lacks built-in support for iterative programs, which arise naturally in many applications including data mining, web ranking, graph analysis, and model fitting. This paper (This is an extended version of the VLDB 2010 paper ``HaLoop: Efficient Iterative Data Processing on Large Clusters'' PVLDB 3(1):285---296, 2010.) presents HaLoop, a modified version of the Hadoop MapReduce framework, that is designed to serve these applications. HaLoop allows iterative applications to be assembled from existing Hadoop programs without modification, and significantly improves their efficiency by providing inter-iteration caching mechanisms and a loop-aware scheduler to exploit these caches. HaLoop retains the fault-tolerance properties of MapReduce through automatic cache recovery and task re-execution. We evaluated HaLoop on a variety of real applications and real datasets. Compared with Hadoop, on average, HaLoop improved runtimes by a factor of 1.85 and shuffled only 4 \% as much data between mappers and reducers in the applications that we tested.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Alexe:2012:MCI, author = "Bogdan Alexe and Mauricio Hern{\'a}ndez and Lucian Popa and Wang-Chiew Tan", title = "{MapMerge}: correlating independent schema mappings", journal = j-VLDB-J, volume = "21", number = "2", pages = "191--211", month = apr, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0264-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Apr 23 08:02:21 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "One of the main steps toward integration or exchange of data is to design the mappings that describe the (often complex) relationships between the source schemas or formats and the desired target schema. In this paper, we introduce a new operator, called MapMerge, that can be used to correlate multiple, independently designed schema mappings of smaller scope into larger schema mappings. This allows a more modular construction of complex mappings from various types of smaller mappings such as schema correspondences produced by a schema matcher or pre-existing mappings that were designed by either a human user or via mapping tools. In particular, the new operator also enables a new ``divide-and-merge'' paradigm for mapping creation, where the design is divided (on purpose) into smaller components that are easier to create and understand and where MapMerge is used to automatically generate a meaningful overall mapping. We describe our MapMerge algorithm and demonstrate the feasibility of our implementation on several real and synthetic mapping scenarios. In our experiments, we make use of a novel similarity measure between two database instances with different schemas that quantifies the preservation of data associations. We show experimentally that MapMerge improves the quality of the schema mappings, by significantly increasing the similarity between the input source instance and the generated target instance. Finally, we provide a new algorithm that combines MapMerge with schema mapping composition to correlate flows of schema mappings.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Fan:2012:TCF, author = "Wenfei Fan and Jianzhong Li and Shuai Ma and Nan Tang and Wenyuan Yu", title = "Towards certain fixes with editing rules and master data", journal = j-VLDB-J, volume = "21", number = "2", pages = "213--238", month = apr, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0253-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Apr 23 08:02:21 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A variety of integrity constraints have been studied for data cleaning. While these constraints can detect the presence of errors, they fall short of guiding us to correct the errors. Indeed, data repairing based on these constraints may not find certain fixes that are guaranteed correct, and worse still, may even introduce new errors when attempting to repair the data. We propose a method for finding certain fixes, based on master data, a notion of certain regions, and a class of editing rules. A certain region is a set of attributes that are assured correct by the users. Given a certain region and master data, editing rules tell us what attributes to fix and how to update them. We show how the method can be used in data monitoring and enrichment. We also develop techniques for reasoning about editing rules, to decide whether they lead to a unique fix and whether they are able to fix all the attributes in a tuple, relative to master data and a certain region. Furthermore, we present a framework and an algorithm to find certain fixes, by interacting with the users to ensure that one of the certain regions is correct. We experimentally verify the effectiveness and scalability of the algorithm.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Johnson:2012:SWA, author = "Ryan Johnson and Ippokratis Pandis and Radu Stoica and Manos Athanassoulis and Anastasia Ailamaki", title = "Scalability of write-ahead logging on multicore and multisocket hardware", journal = j-VLDB-J, volume = "21", number = "2", pages = "239--263", month = apr, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0260-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Apr 23 08:02:21 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The shift to multi-core and multi-socket hardware brings new challenges to database systems, as the software parallelism determines performance. Even though database systems traditionally accommodate simultaneous requests, a multitude of synchronization barriers serialize execution. Write-ahead logging is a fundamental, omnipresent component in ARIES-style concurrency and recovery, and one of the most important yet-to-be addressed potential bottlenecks, especially in OLTP workloads making frequent small changes to data. In this paper, we identify four logging-related impediments to database system scalability. Each issue challenges different level in the software architecture: (a) the high volume of small-sized I/O requests may saturate the disk, (b) transactions hold locks while waiting for the log flush, (c) extensive context switching overwhelms the OS scheduler with threads executing log I/Os, and (d) contention appears as transactions serialize accesses to in-memory log data structures. We demonstrate these problems and address them with techniques that, when combined, comprise a holistic, scalable approach to logging. Our solution achieves a 20---69\% speedup over a modern database system when running log-intensive workloads, such as the TPC-B and TATP benchmarks, in a single-socket multiprocessor server. Moreover, it achieves log insert throughput over 2.2 GB/s for small log records on the single-socket server, roughly 20 times higher than the traditional way of accessing the log using a single mutex. Furthermore, we investigate techniques on scaling the performance of logging to multi-socket servers. We present a set of optimizations which partly ameliorate the latency penalty that comes with multi-socket hardware, and then we investigate the feasibility of applying a distributed log buffer design at the socket level.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chen:2012:AUP, author = "Su Chen and Beng Chin Ooi and Zhenjie Zhang", title = "An adaptive updating protocol for reducing moving object database workload", journal = j-VLDB-J, volume = "21", number = "2", pages = "265--286", month = apr, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0257-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Apr 23 08:02:21 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In the last decade, spatio-temporal database research focuses on the design of effective and efficient indexing structures in support of location-based queries such as predictive range queries and nearest neighbor queries. While a variety of indexing techniques have been proposed to accelerate the processing of updates and queries, not much attention has been paid to the updating protocol, which is another important factor affecting the system performance. In this paper, we propose a generic and adaptive updating protocol for moving object databases with less number of updates between objects and the database server, thereby reducing the overall workload of the system. In contrast to the approach adopted by most conventional moving object database systems where the exact locations and velocities last disclosed are used to predict their motions, we propose the concept of Spatio-temporal safe region to approximate possible future locations. Spatio-temporal safe regions provide larger space of tolerance for moving objects, freeing them from location and velocity updates as long as the errors remain predictable in the database. To answer predictive queries accurately, the server is allowed to probe the latest status of objects when their safe regions are inadequate in returning the exact query results. Spatio-temporal safe regions are calculated and optimized by the database server with two contradictory objectives: reducing update workload while guaranteeing query accuracy and efficiency. To achieve this, we propose a cost model that estimates the composition of active and passive updates based on historical motion records and query distribution. More system performance improvements can be obtained by cutting more updates from the clients, when the users of system are comfortable with incomplete but accuracy bounded query results. We have conducted extensive experiments to evaluate our proposal on a variety of popular indexing structures. The results confirm the viability, robustness, accuracy and efficiency of our proposed protocol.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Fusco:2012:RTC, author = "Francesco Fusco and Michail Vlachos and Marc Ph. Stoecklin", title = "Real-time creation of bitmap indexes on streaming network data", journal = j-VLDB-J, volume = "21", number = "3", pages = "287--307", month = jun, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0242-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Jun 26 17:39:07 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "High-speed archival and indexing solutions of streaming traffic are growing in importance for applications such as monitoring, forensic analysis, and auditing. Many large institutions require fast solutions to support expedient analysis of historical network data, particularly in case of security breaches. However, ``turning back the clock'' is not a trivial task. The first major challenge is that such a technology needs to support data archiving under extremely high-speed insertion rates. Moreover, the archives created have to be stored in a compressed format that is still amenable to indexing and search. The above requirements make general-purpose databases unsuitable for this task and dedicated solutions are required. This work describes a solution for high-speed archival storage, indexing, and data querying on network flow information. We make the two following important contributions: (a) we propose a novel compressed bitmap index approach that significantly reduces both CPU load and disk consumption and, (b) we introduce an online stream reordering mechanism that further reduces space requirements and improves the time for data retrieval. The reordering methodology is based on the principles of locality-sensitive hashing (LSH) and also of interest for other bitmap creation techniques. Because of the synergy of these two components, our solution can sustain data insertion rates that reach 500,000--1 million records per second. To put these numbers into perspective, typical commercial network flow solutions can currently process 20,000--60,000 flows per second. In addition, our system offers interactive query response times that enable administrators to perform complex analysis tasks on the fly. Our technique is directly amenable to parallel execution, allowing its application in domains that are challenged by large volumes of historical measurement data, such as network auditing, traffic behavior analysis, and large-scale data visualization in service provider networks.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Gordevicus:2012:PTA, author = "Juozas Gordevi{\v{c}}us and Johann Gamper and Michael B{\"o}hlen", title = "Parsimonious temporal aggregation", journal = j-VLDB-J, volume = "21", number = "3", pages = "309--332", month = jun, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0243-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Jun 26 17:39:07 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Temporal aggregation is an important operation in temporal databases, and different variants thereof have been proposed. In this paper, we introduce a novel temporal aggregation operator, termed parsimonious temporal aggregation (PTA), that overcomes major limitations of existing approaches. PTA takes the result of instant temporal aggregation (ITA) of size n, which might be up to twice as large as the argument relation, and merges similar tuples until a given error ( {\epsilon} ) or size ( c ) bound is reached. The new operator is data-adaptive and allows the user to control the trade-off between the result size and the error introduced by merging. For the precise evaluation of PTA queries, we propose two dynamic programming---based algorithms for size- and error-bounded queries, respectively, with a worst-case complexity that is quadratic in n. We present two optimizations that take advantage of temporal gaps and different aggregation groups and achieve a linear runtime in experiments with real-world data. For the quick computation of an approximate PTA answer, we propose an efficient greedy merging strategy with a precision that is upper bounded by O (log n ). We present two algorithms that implement this strategy and begin to merge as ITA tuples are produced. They require O ( n log ( c + {\ss} )) time and O ( c + {\ss} ) space, where {\ss} is the size of a read-ahead buffer and is typically very small. An empirical evaluation on real-world and synthetic data shows that PTA considerably reduces the size of the aggregation result, yet introducing only small errors. The greedy algorithms are scalable for large data sets and introduce less error than other approximation techniques.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Hore:2012:SMR, author = "Bijit Hore and Sharad Mehrotra and Mustafa Canim and Murat Kantarcioglu", title = "Secure multidimensional range queries over outsourced data", journal = j-VLDB-J, volume = "21", number = "3", pages = "333--358", month = jun, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0245-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Jun 26 17:39:07 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper, we study the problem of supporting multidimensional range queries on encrypted data. The problem is motivated by secure data outsourcing applications where a client may store his/her data on a remote server in encrypted form and want to execute queries using server's computational capabilities. The solution approach is to compute a secure indexing tag of the data by applying bucketization (a generic form of data partitioning) which prevents the server from learning exact values but still allows it to check if a record satisfies the query predicate. Queries are evaluated in an approximate manner where the returned set of records may contain some false positives. These records then need to be weeded out by the client which comprises the computational overhead of our scheme. We develop a bucketization procedure for answering multidimensional range queries on multidimensional data. For a given bucketization scheme, we derive cost and disclosure-risk metrics that estimate client's computational overhead and disclosure risk respectively. Given a multidimensional dataset, its bucketization is posed as an optimization problem where the goal is to minimize the risk of disclosure while keeping query cost (client's computational overhead) below a certain user-specified threshold value. We provide a tunable data bucketization algorithm that allows the data owner to control the trade-off between disclosure risk and cost. We also study the trade-off characteristics through an extensive set of experiments on real and synthetic data.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Hose:2012:SSP, author = "Katja Hose and Akrivi Vlachou", title = "A survey of skyline processing in highly distributed environments", journal = j-VLDB-J, volume = "21", number = "3", pages = "359--384", month = jun, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0246-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Jun 26 17:39:07 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "During the last decades, data management and storage have become increasingly distributed. Advanced query operators, such as skyline queries, are necessary in order to help users to handle the huge amount of available data by identifying a set of interesting data objects. Skyline query processing in highly distributed environments poses inherent challenges and demands and requires non-traditional techniques due to the distribution of content and the lack of global knowledge. This paper surveys this interesting and still evolving research area, so that readers can easily obtain an overview of the state-of-the-art. We outline the objectives and the main principles that any distributed skyline approach has to fulfill, leading to useful guidelines for developing algorithms for distributed skyline processing. We review in detail existing approaches that are applicable for highly distributed environments, clarify the assumptions of each approach, and provide a comparative performance analysis. Moreover, we study the skyline variants each approach supports. Our analysis leads to a taxonomy of existing approaches. Finally, we present interesting research topics on distributed skyline computation that have not yet been explored.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Gong:2012:EMU, author = "Jian Gong and Reynold Cheng and David W. Cheung", title = "Efficient management of uncertainty in {XML} schema matching", journal = j-VLDB-J, volume = "21", number = "3", pages = "385--409", month = jun, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0248-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Jun 26 17:39:07 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Despite advances in machine learning technologies a schema matching result between two database schemas (e.g., those derived from COMA++) is likely to be imprecise. In particular, numerous instances of ``possible mappings'' between the schemas may be derived from the matching result. In this paper, we study problems related to managing possible mappings between two heterogeneous XML schemas. First, we study how to efficiently generate possible mappings for a given schema matching task. While this problem can be solved by existing algorithms, we show how to improve the performance of the solution by using a divide-and-conquer approach. Second, storing and querying a large set of possible mappings can incur large storage and evaluation overhead. For XML schemas, we observe that their possible mappings often exhibit a high degree of overlap. We hence propose a novel data structure, called the block tree, to capture the commonalities among possible mappings. The block tree is useful for representing the possible mappings in a compact manner and can be efficiently generated. Moreover, it facilitates the evaluation of a probabilistic twig query (PTQ), which returns the non-zero probability that a fragment of an XML document matches a given query. For users who are interested only in answers with k -highest probabilities, we also propose the top- k PTQ and present an efficient solution for it. An extensive evaluation on real-world data sets shows that our approaches significantly improve the efficiency of generating, storing, and querying possible mappings.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Cao:2012:SSA, author = "Yu Cao and Ramadhana Bramandia and Chee-Yong Chan and Kian-Lee Tan", title = "Sort-sharing-aware query processing", journal = j-VLDB-J, volume = "21", number = "3", pages = "411--436", month = jun, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0251-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Jun 26 17:39:07 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Many database applications require sorting a table (or relation) over multiple sort orders. Some examples include creation of multiple indices on a relation, generation of multiple reports from a table, evaluation of a complex query that involves multiple instances of a relation, and batch processing of a set of queries. In this paper, we study how to optimize multiple sortings of a table. We investigate the correlation between sort orders and exploit sort-sharing techniques of reusing the (partial) work done to sort a table on a particular order for another order. Specifically, we introduce a novel and powerful evaluation technique, called cooperative sorting, that enables sort sharing between seemingly non-related sort orders. Subsequently, given a specific set of sort orders, we determine the best combination of various sort-sharing techniques so as to minimize the total processing cost. We also develop techniques to make a traditional query optimizer extensible so that it will not miss the truly cheapest execution plan with the sort-sharing (post-) optimization turned on. We demonstrate the efficiency of our ideas with a prototype implementation in PostgreSQL and evaluate the performance using both TPC-DS benchmark and synthetic data. Our experimental results show significant performance improvement over the traditional evaluation scheme.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Feng:2012:TJT, author = "Jianhua Feng and Jiannan Wang and Guoliang Li", title = "Trie-join: a trie-based method for efficient string similarity joins", journal = j-VLDB-J, volume = "21", number = "4", pages = "437--461", month = aug, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0252-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 20 14:56:19 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A string similarity join finds similar pairs between two collections of strings. Many applications, e.g., data integration and cleaning, can significantly benefit from an efficient string-similarity-join algorithm. In this paper, we study string similarity joins with edit-distance constraints. Existing methods usually employ a filter-and-refine framework and suffer from the following limitations: (1) They are inefficient for the data sets with short strings (the average string length is not larger than 30); (2) They involve large indexes; (3) They are expensive to support dynamic update of data sets. To address these problems, we propose a novel method called trie-join, which can generate results efficiently with small indexes. We use a trie structure to index the strings and utilize the trie structure to efficiently find similar string pairs based on subtrie pruning. We devise efficient trie-join algorithms and pruning techniques to achieve high performance. Our method can be easily extended to support dynamic update of data sets efficiently. We conducted extensive experiments on four real data sets. Experimental results show that our algorithms outperform state-of-the-art methods by an order of magnitude on the data sets with short strings.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Augsten:2012:WGA, author = "Nikolaus Augsten and Michael B{\"o}hlen and Curtis Dyreson and Johann Gamper", title = "Windowed $ p q$-grams for approximate joins of data-centric {XML}", journal = j-VLDB-J, volume = "21", number = "4", pages = "463--488", month = aug, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0254-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 20 14:56:19 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In data integration applications, a join matches elements that are common to two data sources. Since elements are represented slightly different in each source, an approximate join must be used to do the matching. For XML data, most existing approximate join strategies are based on some ordered tree matching technique, such as the tree edit distance. In data-centric XML, however, the sibling order is irrelevant, and two elements should match even if their subelement order varies. Thus, approximate joins for data-centric XML must leverage unordered tree matching techniques. This is computationally hard since the algorithms cannot rely on a predefined sibling order. In this paper, we give a solution for approximate joins based on unordered tree matching. The core of our solution are windowed pq-grams which are small subtrees of a specific shape. We develop an efficient technique to generate windowed pq -grams in a three-step process: sort the tree, extend the sorted tree with dummy nodes, and decompose the extended tree into windowed pq -grams. The windowed pq -grams distance between two trees is the number of pq -grams that are in one tree decomposition only. We show that our distance is a pseudo-metric and empirically demonstrate that it effectively approximates the unordered tree edit distance. The approximate join using windowed pq -grams can be efficiently implemented as an equality join on strings, which avoids the costly computation of the distance between every pair of input trees. Experiments with synthetic and real world data confirm the analytic results and show the effectiveness and efficiency of our technique.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhou:2012:ESM, author = "Xiangmin Zhou and Xiaofang Zhou and Lei Chen and Athman Bouguettaya", title = "Efficient subsequence matching over large video databases", journal = j-VLDB-J, volume = "21", number = "4", pages = "489--508", month = aug, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0255-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 20 14:56:19 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Video similarity matching has broad applications such as copyright detection, news tracking and commercial monitoring, etc. Among these applications, one typical task is to detect the local similarity between two videos without the knowledge on positions and lengths of each matched subclip pair. However, most studies so far on video detection investigate the global similarity between two short clips using a pre-defined distance function. Although there are a few works on video subsequence detection, all these proposals fail to provide an effective query processing mechanism. In this paper, we first generalize the problem of video similarity matching. Then, a novel solution called consistent keyframe matching (CKM) is proposed to solve the problem of subsequence matching based on video segmentation. CKM is designed with two goals: (1) good scalability in terms of the query sequence length and the size of video database and (2) fast video subsequence matching in terms of processing time. Good scalability is achieved by employing a batch query paradigm, where keyframes sharing the same query space are summarized and ordered. As such, the redundancy of data access is eliminated, leading to much faster video query processing. Fast subsequence matching is achieved by comparing the keyframes of different video sequences. Specifically, a keyframe matching graph is first constructed and then divided into matched candidate subgraphs. We have evaluated our proposed approach over a very large real video database. Extensive experiments demonstrate the effectiveness and efficiency of our approach.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yildirim:2012:GSI, author = "Hilmi Yildirim and Vineet Chaoji and Mohammed J. Zaki", title = "{GRAIL}: a scalable index for reachability queries in very large graphs", journal = j-VLDB-J, volume = "21", number = "4", pages = "509--534", month = aug, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0256-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 20 14:56:19 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Given a large directed graph, rapidly answering reachability queries between source and target nodes is an important problem. Existing methods for reachability tradeoff indexing time and space versus query time performance. However, the biggest limitation of existing methods is that they do not scale to very large real-world graphs. We present a simple yet scalable reachability index, called GRAIL, that is based on the idea of randomized interval labeling and that can effectively handle very large graphs. Based on an extensive set of experiments, we show that while more sophisticated methods work better on small graphs, GRAIL is the only index that can scale to millions of nodes and edges. GRAIL has linear indexing time and space, and the query time ranges from constant time to being linear in the graph order and size. Our reference C++ implementations are open source and available for download at http://www.code.google.com/p/grail/.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Xu:2012:EES, author = "Jia Xu and Zhenjie Zhang and Anthony K. Tung and Ge Yu", title = "Efficient and effective similarity search over probabilistic data based on {Earth Mover's Distance}", journal = j-VLDB-J, volume = "21", number = "4", pages = "535--559", month = aug, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0258-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 20 14:56:19 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Advances in geographical tracking, multimedia processing, information extraction, and sensor networks have created a deluge of probabilistic data. While similarity search is an important tool to support the manipulation of probabilistic data, it raises new challenges to traditional relational databases. The problem stems from the limited effectiveness of the distance metrics employed by existing database systems. On the other hand, several more complicated distance operators have proven their values for better distinguishing ability in specific probabilistic domains. In this paper, we discuss the similarity search problem with respect to Earth Mover's Distance (EMD). EMD is the most successful distance metric for probability distribution comparison but is an expensive operator as it has cubic time complexity. We present a new database indexing approach to answer EMD-based similarity queries, including range queries and $k$-nearest neighbor queries on probabilistic data. Our solution utilizes primal-dual theory from linear programming and employs a group of B$^+$ trees for effective candidate pruning. We also apply our filtering technique to the processing of continuous similarity queries, especially with applications to frame copy detection in real-time videos. Extensive experiments show that our proposals dramatically improve the usefulness and scalability of probabilistic data management.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhang:2012:HOA, author = "Rui Zhang and Jianzhong Qi and Dan Lin and Wei Wang and Raymond Chi-Wing Wong", title = "A highly optimized algorithm for continuous intersection join queries over moving objects", journal = j-VLDB-J, volume = "21", number = "4", pages = "561--586", month = aug, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0259-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 20 14:56:19 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Given two sets of moving objects with nonzero extents, the continuous intersection join query reports every pair of intersecting objects, one from each of the two moving object sets, for every timestamp. This type of queries is important for a number of applications, e.g., in the multi-billion dollar computer game industry, massively multiplayer online games like World of Warcraft need to monitor the intersection among players' attack ranges and render players' interaction in real time. The computational cost of a straightforward algorithm or an algorithm adapted from another query type is prohibitive, and answering the query in real time poses a great challenge. Those algorithms compute the query answer for either too long or too short a time interval, which results in either a very large computation cost per answer update or too frequent answer updates, respectively. This observation motivates us to optimize the query processing in the time dimension. In this study, we achieve this optimization by introducing the new concept of time-constrained (TC) processing. Further, TC processing enables a set of effective improvement techniques on traditional intersection join algorithms. Finally, we provide a method to find the optimal value for an important parameter required in our technique, the maximum update interval. As a result, we achieve a highly optimized algorithm for processing continuous intersection join queries on moving objects. With a thorough experimental study, we show that our algorithm outperforms the best adapted existing solution by several orders of magnitude. We also validate the accuracy of our cost model and its effectiveness in optimizing the performance.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lehner:2012:SSL, author = "Wolfgang Lehner and Michael J. Franklin", title = "Special section on large-scale analytics", journal = j-VLDB-J, volume = "21", number = "5", pages = "587--588", month = oct, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0291-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Oct 22 09:44:31 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wolf:2012:OSM, author = "Joel Wolf and Andrey Balmin and Deepak Rajan and Kirsten Hildrum and Rohit Khandekar and Sujay Parekh and Kun-Lung Wu and Rares Vernica", title = "On the optimization of schedules for {MapReduce} workloads in the presence of shared scans", journal = j-VLDB-J, volume = "21", number = "5", pages = "589--609", month = oct, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0279-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Oct 22 09:44:31 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We consider MapReduce clusters designed to support multiple concurrent jobs, concentrating on environments in which the number of distinct datasets is modest relative to the number of jobs. In such scenarios, many individual datasets are likely to be scanned concurrently by multiple Map phase jobs. As has been noticed previously, this scenario provides an opportunity for Map phase jobs to cooperate, sharing the scans of these datasets, and thus reducing the costs of such scans. Our paper has three main contributions over previous work. First, we present a novel and highly general method for sharing scans and thus amortizing their costs. This concept, which we call cyclic piggybacking, has a number of advantages over the more traditional batching scheme described in the literature. Second, we notice that the various subjobs generated in this manner can be assumed in an optimal schedule to respect a natural chain precedence ordering. Third, we describe a significant but natural generalization of the recently introduced FLEX scheduler for optimizing schedules within the context of this cyclic piggybacking paradigm, which can be tailored to a variety of cost metrics. Such cost metrics include average response time, average stretch, and any minimax-type metric--a total of 11 separate and standard metrics in all. Moreover, most of this carries over in the more general case of overlapping rather than identical datasets as well, employing what we will call semi-shared scans. In such scenarios, chain precedence is replaced by arbitrary precedence, but we can still handle 8 of the original 11 metrics. The overall approach, including both cyclic piggybacking and the FLEX scheduling generalization, is called CIRCUMFLEX. We describe some practical implementation strategies. And we evaluate the performance of CIRCUMFLEX via a variety of simulation and real benchmark experiments.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhou:2012:SPD, author = "Jingren Zhou and Nicolas Bruno and Ming-Chuan Wu and Per-Ake Larson and Ronnie Chaiken and Darren Shakib", title = "{SCOPE}: parallel databases meet {MapReduce}", journal = j-VLDB-J, volume = "21", number = "5", pages = "611--636", month = oct, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0280-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Oct 22 09:44:31 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Companies providing cloud-scale data services have increasing needs to store and analyze massive data sets, such as search logs, click streams, and web graph data. For cost and performance reasons, processing is typically done on large clusters of tens of thousands of commodity machines. Such massive data analysis on large clusters presents new opportunities and challenges for developing a highly scalable and efficient distributed computation system that is easy to program and supports complex system optimization to maximize performance and reliability. In this paper, we describe a distributed computation system, Structured Computations Optimized for Parallel Execution (Scope), targeted for this type of massive data analysis. Scope combines benefits from both traditional parallel databases and MapReduce execution engines to allow easy programmability and deliver massive scalability and high performance through advanced optimization. Similar to parallel databases, the system has a SQL-like declarative scripting language with no explicit parallelism, while being amenable to efficient parallel execution on large clusters. An optimizer is responsible for converting scripts into efficient execution plans for the distributed computation engine. A physical execution plan consists of a directed acyclic graph of vertices. Execution of the plan is orchestrated by a job manager that schedules execution on available machines and provides fault tolerance and recovery, much like MapReduce systems. Scope is being used daily for a variety of data analysis and data mining applications over tens of thousands of machines at Microsoft, powering Bing, and other online services.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Kang:2012:GEA, author = "U. Kang and Hanghang Tong and Jimeng Sun and Ching-Yung Lin and Christos Faloutsos", title = "{{\tt gbase}}: an efficient analysis platform for large graphs", journal = j-VLDB-J, volume = "21", number = "5", pages = "637--650", month = oct, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0283-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Oct 22 09:44:31 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Graphs appear in numerous applications including cyber security, the Internet, social networks, protein networks, recommendation systems, citation networks, and many more. Graphs with millions or even billions of nodes and edges are common-place. How to store such large graphs efficiently? What are the core operations/queries on those graph? How to answer the graph queries quickly? We propose Gbase, an efficient analysis platform for large graphs. The key novelties lie in (1) our storage and compression scheme for a parallel, distributed settings and (2) the carefully chosen graph operations and their efficient implementations. We designed and implemented an instance of Gbase using MapReduce\slash Hadoop. Gbase provides a parallel indexing mechanism for graph operations that both saves storage space, as well as accelerates query responses. We run numerous experiments on real and synthetic graphs, spanning billions of nodes and edges, and we show that our proposed Gbase is indeed fast, scalable, and nimble, with significant savings in space and time.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Tran:2012:CMP, author = "Thanh T. Tran and Liping Peng and Yanlei Diao and Andrew Mcgregor and Anna Liu", title = "{CLARO}: modeling and processing uncertain data streams", journal = j-VLDB-J, volume = "21", number = "5", pages = "651--676", month = oct, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0261-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Oct 22 09:44:31 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Uncertain data streams, where data are incomplete and imprecise, have been observed in many environments. Feeding such data streams to existing stream systems produces results of unknown quality, which is of paramount concern to monitoring applications. In this paper, we present the claro system that supports stream processing for uncertain data naturally captured using continuous random variables. claro employs a unique data model that is flexible and allows efficient computation. Built on this model, we develop evaluation techniques for relational operators by exploring statistical theory and approximation. We also consider query planning for complex queries given an accuracy requirement. Evaluation results show that our techniques can achieve high performance while satisfying accuracy requirements and outperform state-of-the-art sampling methods.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Helmer:2012:MSS, author = "Sven Helmer and Nikolaus Augsten and Michael B{\"o}hlen", title = "Measuring structural similarity of semistructured data based on information-theoretic approaches", journal = j-VLDB-J, volume = "21", number = "5", pages = "677--702", month = oct, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0263-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Oct 22 09:44:31 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We propose and experimentally evaluate different approaches for measuring the structural similarity of semistructured documents based on information-theoretic concepts. Common to all approaches is a two-step procedure: first, we extract and linearize the structural information from documents, and then, we use similarity measures that are based on, respectively, Kolmogorov complexity and Shannon entropy to determine the distance between the documents. Compared to other approaches, we are able to achieve a linear run-time complexity and demonstrate in an experimental evaluation that the results of our technique in terms of clustering quality are on a par with or even better than those of other, slower approaches.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Cheema:2012:EPS, author = "Muhammad Aamir Cheema and Wenjie Zhang and Xuemin Lin and Ying Zhang", title = "Efficiently processing snapshot and continuous reverse $k$ nearest neighbors queries", journal = j-VLDB-J, volume = "21", number = "5", pages = "703--728", month = oct, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0265-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Oct 22 09:44:31 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Given a set of objects and a query q, a point p is called the reverse k nearest neighbor (R k NN) of q if q is one of the k closest objects of p. In this paper, we introduce the concept of influence zone that is the area such that every point inside this area is the R k NN of q and every point outside this area is not the R k NN. The influence zone has several applications in location-based services, marketing and decision support systems. It can also be used to efficiently process R k NN queries. First, we present efficient algorithm to compute the influence zone. Then, based on the influence zone, we present efficient algorithms to process R k NN queries that significantly outperform existing best-known techniques for both the snapshot and continuous R k NN queries. We also present a detailed theoretical analysis to analyze the area of the influence zone and IO costs of our R k NN processing algorithms. Our experiments demonstrate the accuracy of our theoretical analysis. This paper is an extended version of our previous work (Cheema et al. in Proceedings of ICDE, pp. 577---588, 2011). We make the following new contributions in this extended version: (1) we conduct a rigorous complexity analysis and show that the complexity of one of our proposed algorithms in Cheema et al. (Proceedings of ICDE, pp. 577---588, 2011) can be reduced from O ( m$^2$ ) to O ( km ) where m {$>$} k is the number of objects used to compute the influence zone, (2) we show that our techniques can be applied to dimensionality higher than two, and (3) we present efficient techniques to handle data updates.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zheng:2012:SQP, author = "Kai Zheng and Xiaofang Zhou and Pui Cheong Fung and Kexin Xie", title = "Spatial query processing for fuzzy objects", journal = j-VLDB-J, volume = "21", number = "5", pages = "729--751", month = oct, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0266-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Oct 22 09:44:31 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Range and nearest neighbor queries are the most common types of spatial queries, which have been investigated extensively in the last decades due to its broad range of applications. In this paper, we study this problem in the context of fuzzy objects that have indeterministic boundaries. Fuzzy objects play an important role in many areas, such as biomedical image databases and GIS communities. Existing research on fuzzy objects mainly focuses on modeling basic fuzzy object types and operations, leaving the processing of more advanced queries largely untouched. In this paper, we propose two new kinds of spatial queries for fuzzy objects, namely single threshold query and continuous threshold query, to determine the query results which qualify at a certain probability threshold and within a probability interval, respectively. For efficient single threshold query processing, we optimize the classical R-tree-based search algorithm by deriving more accurate approximations for the distance function between fuzzy objects and the query object. To enhance the performance of continuous threshold queries, effective pruning rules are developed to reduce the search space and speed up the candidate refinement process. The efficiency of our proposed algorithms as well as the optimization techniques is verified with an extensive set of experiments using both synthetic and real datasets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2012:MFS, author = "Jianzhong Li and Zhaonian Zou and Hong Gao", title = "Mining frequent subgraphs over uncertain graph databases under probabilistic semantics", journal = j-VLDB-J, volume = "21", number = "6", pages = "753--777", month = dec, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0268-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jan 5 08:04:46 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Frequent subgraph mining has been extensively studied on certain graph data. However, uncertainty is intrinsic in graph data in practice, but there is very few work on mining uncertain graph data. This paper focuses on mining frequent subgraphs over uncertain graph data under the probabilistic semantics. Specifically, a measure called $ \varphi $-frequent probability is introduced to evaluate the degree of recurrence of subgraphs. Given a set of uncertain graphs and two real numbers $ 0 < \varphi, \tau < 1$, the goal is to quickly find all subgraphs with $ \varphi $-frequent probability at least $ \tau $. Due to the NP-hardness of the problem and to the \#P-hardness of computing the $ \varphi $-frequent probability of a subgraph, an approximate mining algorithm is proposed to produce an $ (\varepsilon, \delta)$-approximate set $ \Pi $ of ``frequent subgraphs'', where $ 0 < \varepsilon < \tau $ is error tolerance, and $ 0 < \delta < 1$ is a confidence bound. The algorithm guarantees that (1) any frequent subgraph $S$ is contained in $ \Pi $ with probability at least $ ((1 - \delta) / 2)^s$, where $s$ is the number of edges in $S$; (2) any infrequent subgraph with $ \varphi $-frequent probability less than $ \tau - \varepsilon $ is contained in $ \Pi $ with probability at most $ \delta / 2$. The theoretical analysis shows that to obtain any frequent subgraph with probability at least $ 1 - \Delta $, the input parameter \delta of the algorithm must be set to at most $ 1 - 2 (1 - \Delta)^{1 / \ell_{\rm max}}$, where $ 0 < \Delta < 1$, and $ \ell_{\rm max}$ is the maximum number of edges in frequent subgraphs. Extensive experiments on real uncertain graph data verify that the proposed algorithm is practically efficient and has very high approximation quality. Moreover, the difference between the probabilistic semantics and the expected semantics on mining frequent subgraphs over uncertain graph data has been discussed in this paper for the first time.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Vergoulis:2012:ARS, author = "Thanasis Vergoulis and Theodore Dalamagas and Dimitris Sacharidis and Timos Sellis", title = "Approximate regional sequence matching for genomic databases", journal = j-VLDB-J, volume = "21", number = "6", pages = "779--795", month = dec, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0270-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jan 5 08:04:46 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Recent advances in computational biology have raised sequence matching requirements that result in new types of sequence database problems. In this work, we introduce an important class of such problems, the approximate regional sequence matching (ARSM) problem. Given a data and a pattern sequence, an ARSM result is an approximate occurrence of a region of the pattern in the data sequence under two conditions. First, the region must contain a predetermined area of the pattern sequence, termed core. Second, the allowable deviation between the region of the pattern and its occurrence in the data sequence depends on the length of the region. We propose the PS-ARSM method that processes holistically the regions of a pattern, taking advantage of their overlaps to efficiently identify the ARSM results. Its performance is evaluated with respect to existing techniques adapted to the ARSM problem.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wu:2012:FES, author = "Dingming Wu and Gao Cong and Christian S. Jensen", title = "A framework for efficient spatial web object retrieval", journal = j-VLDB-J, volume = "21", number = "6", pages = "797--822", month = dec, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0271-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jan 5 08:04:46 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The conventional Internet is acquiring a geospatial dimension. Web documents are being geo-tagged and geo-referenced objects such as points of interest are being associated with descriptive text documents. The resulting fusion of geo-location and documents enables new kinds of queries that take into account both location proximity and text relevancy. This paper proposes a new indexing framework for top-$k$ spatial text retrieval. The framework leverages the inverted file for text retrieval and the R-tree for spatial proximity querying. Several indexing approaches are explored within this framework. The framework encompasses algorithms that utilize the proposed indexes for computing location-aware as well as region-aware top-$k$ text retrieval queries, thus taking into account both text relevancy and spatial proximity to prune the search space. Results of empirical studies with an implementation of the framework demonstrate that the paper's proposal is capable of excellent performance.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Arenas:2012:QLB, author = "Marcelo Arenas and Jorge P{\'e}rez and Juan Reutter and Cristian Riveros", title = "Query language-based inverses of schema mappings: semantics, computation, and closure properties", journal = j-VLDB-J, volume = "21", number = "6", pages = "823--842", month = dec, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0272-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jan 5 08:04:46 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The inversion of schema mappings has been identified as one of the fundamental operators for the development of a general framework for metadata management. During the last few years, three alternative notions of inversion for schema mappings have been proposed (Fagin-inverse (Fagin, TODS 32(4), 25:1---25:53, 2007), quasi-inverse (Fagin et al., TODS 33(2), 11:1---11:52, 2008), and maximum recovery (Arenas et al., TODS 34(4), 22:1---22:48, 2009)). However, these notions lack some fundamental properties that limit their practical applicability: most of them are expressed in languages including features that are difficult to use in practice, some of these inverses are not guaranteed to exist for mappings specified with source-to-target tuple-generating dependencies (st-tgds), and it has been futile to search for a meaningful mapping language that is closed under any of these notions of inverse. In this paper, we develop a framework for the inversion of schema mappings that fulfills all of the above requirements. It is based on the notion of $ {\mathcal {C}}$-maximum recovery, for a query language $ {\mathcal {C}}$, a notion designed to generate inverse mappings that recover back only the information that can be retrieved with queries in $ {\mathcal {C}}$. By focusing on the language of conjunctive queries (CQ), we are able to find a mapping language that contains the class of st-tgds, is closed under CQ-maximum recovery, and for which the chase procedure can be used to exchange data efficiently. Furthermore, we show that our choices of inverse notion and mapping language are optimal, in the sense that choosing a more expressive inverse operator or mapping language causes the loss of these properties.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Bravo:2012:CRX, author = "Loreto Bravo and James Cheney and Irini Fundulaki and Ricardo Segovia", title = "Consistency and repair for {XML} write-access control policies", journal = j-VLDB-J, volume = "21", number = "6", pages = "843--867", month = dec, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0273-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jan 5 08:04:46 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "XML access control policies involving updates may contain security flaws, here called inconsistencies, in which a forbidden operation may be simulated by performing a sequence of allowed operations. This article investigates the problem of deciding whether a policy is consistent, and if not, how its inconsistencies can be repaired. We consider total and partial policies expressed in terms of annotated schemas defining which operations are allowed or denied for the XML trees that are instances of the schema. We show that consistency is decidable in PTIME for such policies and that consistent partial policies can be extended to unique least-privilege consistent total policies. We also consider repair problems based on deleting privileges to restore consistency, show that finding minimal repairs is NP-complete, and give heuristics for finding repairs. Finally, we experimentally evaluate these algorithms in comparison with an exact approach based on answer-set programming.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chang:2012:EDD, author = "Lijun Chang and Jeffrey Xu Yu and Lu Qin and Hong Cheng and Miao Qiao", title = "The exact distance to destination in undirected world", journal = j-VLDB-J, volume = "21", number = "6", pages = "869--888", month = dec, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0274-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jan 5 08:04:46 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Shortest distance queries are essential not only in graph analysis and graph mining tasks but also in database applications, when a large graph needs to be dealt with. Such shortest distance queries are frequently issued by end-users or requested as a subroutine in real applications. For intensive queries on large graphs, it is impractical to compute shortest distances on-line from scratch, and impractical to materialize all-pairs shortest distances. In the literature, 2-hop distance labeling is proposed to index the all-pairs shortest distances. It assigns distance labels to vertices in a large graph in a pre-computing step off-line and then answers shortest distance queries on-line by making use of such distance labels, which avoids exhaustively traversing the large graph when answering queries. However, the existing algorithms to generate 2-hop distance labels are not scalable to large graphs. Finding an optimal 2-hop distance labeling is NP-hard, and heuristic algorithms may generate large size distance labels while still needing to pre-compute all-pairs shortest paths. In this paper, we propose a multi-hop distance labeling approach, which generates a subset of the 2-hop distance labels as index off-line. We can compute the multi-hop distance labels efficiently by avoiding pre-computing all-pairs shortest paths. In addition, our multi-hop distance labeling is small in size to be stored. To answer a shortest distance query between two vertices, we first generate the query-specific small set of 2-hop distance labels for the two vertices based on our multi-hop distance labels stored and compute the shortest distance between the two vertices based on the 2-hop distance labels generated on-line. We conducted extensive performance studies on large real graphs and confirmed the efficiency of our multi-hop distance labeling scheme.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Soh:2012:AEE, author = "Kheng Hong Soh and Ba Quan Truong and Sourav S. Bhowmick", title = "{ANDES}: efficient evaluation of {NOT}-twig queries in relational databases", journal = j-VLDB-J, volume = "21", number = "6", pages = "889--914", month = dec, year = "2012", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0275-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jan 5 08:04:46 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Despite a large body of work on XPath query processing in relational environment, systematic study of queries containing not-predicates have received little attention in the literature. Particularly, several xml supports of industrial-strength commercial rdbms fail to efficiently evaluate such queries. In this paper, we present an efficient and novel strategy to evaluate NOT-twig queries in a tree-unaware relational environment. NOT-twig queries are XPath queries with ancestor --- descendant and parent --- child axis and contain one or more not-predicates. We propose a novel Dewey-based encoding scheme called Andes ( ANcestor Dewey-based Encoding Scheme), which enables us to efficiently filter out elements satisfying a not-predicate by comparing their ancestor group identifiers. In this approach, a set of elements under the same common ancestor at a specific level in the xml tree is assigned same ancestor group identifier. Based on this scheme, we propose a novel sql translation algorithm for NOT-twig query evaluation. Experiments carried out confirm that our proposed approach built on top of an off-the-shelf commercial rdbms significantly outperforms state-of-the-art relational and native approaches. We also explore the query plans selected by a commercial relational optimizer to evaluate our translated queries in different input cardinality. Such exploration further validates the performance benefits of Andes.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lehner:2013:SIB, author = "Wolfgang Lehner and Sunita Sarawagi", title = "Special issue on best papers of {VLDB 2011}", journal = j-VLDB-J, volume = "22", number = "1", pages = "1--2", month = feb, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0301-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jan 24 06:07:36 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Tzoumas:2013:EAG, author = "Kostas Tzoumas and Amol Deshpande and Christian S. Jensen", title = "Efficiently adapting graphical models for selectivity estimation", journal = j-VLDB-J, volume = "22", number = "1", pages = "3--27", month = feb, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0293-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jan 24 06:07:36 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Query optimizers rely on statistical models that succinctly describe the underlying data. Models are used to derive cardinality estimates for intermediate relations, which in turn guide the optimizer to choose the best query execution plan. The quality of the resulting plan is highly dependent on the accuracy of the statistical model that represents the data. It is well known that small errors in the model estimates propagate exponentially through joins, and may result in the choice of a highly sub-optimal query execution plan. Most commercial query optimizers make the attribute value independence assumption: all attributes are assumed to be statistically independent. This reduces the statistical model of the data to a collection of one-dimensional synopses (typically in the form of histograms), and it permits the optimizer to estimate the selectivity of a predicate conjunction as the product of the selectivities of the constituent predicates. However, this independence assumption is more often than not wrong, and is considered to be the most common cause of sub-optimal query execution plans chosen by modern query optimizers. We take a step towards a principled and practical approach to performing cardinality estimation without making the independence assumption. By carefully using concepts from the field of graphical models, we are able to factor the joint probability distribution over all the attributes in the database into small, usually two-dimensional distributions, without a significant loss in estimation accuracy. We show how to efficiently construct such a graphical model from the database using only two-way join queries, and we show how to perform selectivity estimation in a highly efficient manner. We integrate our algorithms into the PostgreSQL DBMS. Experimental results indicate that estimation errors can be greatly reduced, leading to orders of magnitude more efficient query execution plans in many cases. Optimization time is kept in the range of tens of milliseconds, making this a practical approach for industrial-strength query optimizers.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Minhas:2013:RTH, author = "Umar Farooq Minhas and Shriram Rajagopalan and Brendan Cully and Ashraf Aboulnaga and Kenneth Salem and Andrew Warfield", title = "{RemusDB}: transparent high availability for database systems", journal = j-VLDB-J, volume = "22", number = "1", pages = "29--45", month = feb, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0294-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jan 24 06:07:36 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper, we present a technique for building a high-availability (HA) database management system (DBMS). The proposed technique can be applied to any DBMS with little or no customization, and with reasonable performance overhead. Our approach is based on Remus, a commodity HA solution implemented in the virtualization layer, that uses asynchronous virtual machine state replication to provide transparent HA and failover capabilities. We show that while Remus and similar systems can protect a DBMS, database workloads incur a performance overhead of up to 32\% as compared to an unprotected DBMS. We identify the sources of this overhead and develop optimizations that mitigate the problems. We present an experimental evaluation using two popular database systems and industry standard benchmarks showing that for certain workloads, our optimized approach provides fast failover ($ \leq 3 $ s of downtime) with low performance overhead when compared to an unprotected DBMS. Our approach provides a practical means for existing, deployed database systems to be made more reliable with a minimum of risk, cost, and effort. Furthermore, this paper invites new discussion about whether the complexity of HA is best implemented within the DBMS, or as a service by the infrastructure below it.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Furche:2013:OLS, author = "Tim Furche and Georg Gottlob and Giovanni Grasso and Christian Schallhart and Andrew Sellers", title = "{OXPath}: a language for scalable data extraction, automation, and crawling on the deep web", journal = j-VLDB-J, volume = "22", number = "1", pages = "47--72", month = feb, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0286-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jan 24 06:07:36 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The evolution of the web has outpaced itself: A growing wealth of information and increasingly sophisticated interfaces necessitate automated processing, yet existing automation and data extraction technologies have been overwhelmed by this very growth. To address this trend, we identify four key requirements for web data extraction, automation, and (focused) web crawling: (1) interact with sophisticated web application interfaces, (2) precisely capture the relevant data to be extracted, (3) scale with the number of visited pages, and (4) readily embed into existing web technologies. We introduce OXPath as an extension of XPath for interacting with web applications and extracting data thus revealed --- matching all the above requirements. OXPath's page-at-a-time evaluation guarantees memory use independent of the number of visited pages, yet remains polynomial in time. We experimentally validate the theoretical complexity and demonstrate that OXPath's resource consumption is dominated by page rendering in the underlying browser. With an extensive study of sublanguages and properties of OXPath, we pinpoint the effect of specific features on evaluation performance. Our experiments show that OXPath outperforms existing commercial and academic data extraction tools by a wide margin.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Curino:2013:ADS, author = "Carlo Curino and Hyun Jin Moon and Alin Deutsch and Carlo Zaniolo", title = "Automating the database schema evolution process", journal = j-VLDB-J, volume = "22", number = "1", pages = "73--98", month = feb, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0302-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jan 24 06:07:36 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Supporting database schema evolution represents a long-standing challenge of practical and theoretical importance for modern information systems. In this paper, we describe techniques and systems for automating the critical tasks of migrating the database and rewriting the legacy applications. In addition to labor saving, the benefits delivered by these advances are many and include reliable prediction of outcome, minimization of downtime, system-produced documentation, and support for archiving, historical queries, and provenance. The PRISM/PRISM++ system delivers these benefits, by solving the difficult problem of automating the migration of databases and the rewriting of queries and updates. In this paper, we present the PRISM/PRISM++ system and the novel technology that made it possible. In particular, we focus on the difficult and previously unsolved problem of supporting legacy queries and updates under schema and integrity constraints evolution. The PRISM/PRISM++ approach consists in providing the users with a set of SQL-based Schema Modification Operators (SMOs), which describe how the tables in the old schema are modified into those in the new schema. In order to support updates, SMOs are extended with integrity constraints modification operators. By using recent results on schema mapping, the paper (i) characterizes the impact on integrity constraints of structural schema changes, (ii) devises representations that enable the rewriting of updates, and (iii) develop a unified approach for query and update rewriting under constraints. We complement the system with two novel tools: the first automatically collects and provides statistics on schema evolution histories, whereas the second derives equivalent sequences of SMOs from the migration scripts that were used for schema upgrades. These tools were used to produce an extensive testbed containing 15 evolution histories of scientific databases and web information systems, providing over 100 years of aggregate evolution histories and almost 2,000 schema evolution steps.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ramesh:2013:KSF, author = "Aditya Ramesh and S. Sudarshan and Purva Joshi and Manisha Naik Gaonkar", title = "Keyword search on form results", journal = j-VLDB-J, volume = "22", number = "1", pages = "99--123", month = feb, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0287-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jan 24 06:07:36 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In recent years there has been a good deal of research in the area of keyword search on structured and semistructured data. Most of this body of work has a significant limitation in the context of enterprise data, since it ignores the application code that has often been carefully designed to present data in a meaningful fashion to users. In this work, we consider how to perform keyword search on enterprise applications, which provide a number of forms that can take parameters; parameters may be explicit, or implicit such as the identifier of the user. In the context of such applications, the goal of keyword search is, given a set of keywords, to retrieve forms along with corresponding parameter values, such that result of each retrieved form executed on the corresponding retrieved parameter values will contain the specified keywords. Some earlier work in this area was based on creating keyword indices on form results, but there are problems in maintaining such indices in the face of updates. In contrast, we propose techniques based on creating inverted SQL queries from the SQL queries in the forms. Unlike earlier work, our techniques do not require any special purpose indices and instead make use of standard text indices supported by most database systems. We have implemented our techniques and show that keyword search can run at reasonable speeds even on large databases with a significant number of forms.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Dieng:2013:MFC, author = "Cheikh Tidiane Dieng and Tao-Yuan Jen and Dominique Laurent and Nicolas Spyratos", title = "Mining frequent conjunctive queries using functional and inclusion dependencies", journal = j-VLDB-J, volume = "22", number = "2", pages = "125--150", month = apr, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0277-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Mar 29 15:54:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We address the issue of mining frequent conjunctive queries in a relational database, a problem known to be intractable even for conjunctive queries over a single table. In this article, we show that mining frequent projection-selection-join queries becomes tractable if joins are performed along keys and foreign keys, in a database satisfying functional and inclusion dependencies, under certain restrictions. We note that these restrictions cover most practical cases, including databases operating over star schemas, snow-flake schemas and constellation schemas. In our approach, we define an equivalence relation over queries using a pre-ordering with respect to which the support is shown to be anti-monotonic. We propose a level-wise algorithm for computing all frequent queries by exploiting the fact that equivalent queries have the same support. We report on experiments showing that, in our context, mining frequent projection-selection-join queries is indeed tractable, even for large data sets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Tozun:2013:SDB, author = "Pinar T{\"o}z{\"u}n and Ippokratis Pandis and Ryan Johnson and Anastasia Ailamaki", title = "Scalable and dynamically balanced shared-everything {OLTP} with physiological partitioning", journal = j-VLDB-J, volume = "22", number = "2", pages = "151--175", month = apr, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0278-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Mar 29 15:54:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Scaling the performance of shared-everything transaction processing systems to highly parallel multicore hardware remains a challenge for database system designers. Recent proposals alleviate locking and logging bottlenecks in the system, leaving page latching as the next potential problem. To tackle the page latching problem, we propose physiological partitioning (PLP). PLP applies logical-only partitioning, maintaining the desired properties of sharedeverything designs, and introduces a multi-rooted B+Tree index structure (MRBTree) that enables the partitioning of the accesses at the physical page level. Logical partitioning and MRBTrees together ensure that all accesses to a given index page come from a single thread and, hence, can be entirely latch free; an extended design makes heap page accesses thread private as well. Moreover, MRBTrees offer an infrastructure for easy repartitioning and allow us to have a lightweight dynamic load balancing mechanism (DLB) on top of PLP. Profiling a PLP prototype running on different multicore machines shows that it acquires 85 and 68\%fewer contentious critical sections, respectively, than an optimized conventional design and one based on logical-only partitioning. PLP also improves performance up to almost 50 \% over the existing systems, while DLB enhances the system with rapid and robust behavior in both detecting and handling load imbalances.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wu:2013:SXS, author = "Xiaoying Wu and Dimitri Theodoratos", title = "A survey on {XML} streaming evaluation techniques", journal = j-VLDB-J, volume = "22", number = "2", pages = "177--202", month = apr, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0281-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Mar 29 15:54:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "XML is currently the most popular format for exchanging and representing data on the web. It is used in various applications and for different types of data including structured, semistructured, and unstructured heterogeneous data types. During the period, XML was establishing itself, data streaming applications have gained increased attention and importance. Because of these developments, the querying and efficient processing of XML streams has became a central issue. In this study, we survey the state of the art in XML streaming evaluation techniques. We focus on both the streaming evaluation of XPath expressions and of XQuery queries. We classify the XPath streaming evaluation approaches according to the main data structure used for the evaluation into three categories: automaton-based approach, array-based approach, and stack-based approach. We review, analyze, and compare the major techniques proposed for each approach. We also review multiple query streaming evaluation techniques. For the XQuery streaming evaluation problem, we identify and discuss four processing paradigms adopted by the existing XQuery stream query engines: the transducer-based paradigm, the algebra-based paradigm, the automata-algebra paradigm, and the pull-based paradigm. In addition, we review optimization techniques for XQuery streaming evaluation. We address the problem of optimizing XQuery streaming evaluation as a buffer optimization problem. For all techniques discussed, we describe the research issues and the proposed algorithms and we compare them with other relevant suggested techniques.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lu:2013:ADU, author = "Wentian Lu and Gerome Miklau and Neil Immerman", title = "Auditing a database under retention policies", journal = j-VLDB-J, volume = "22", number = "2", pages = "203--228", month = apr, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0282-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Mar 29 15:54:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Auditing the changes to a database is critical for identifying malicious behavior, maintaining data quality, and improving system performance. But an accurate audit log is an historical record of the past that can also pose a serious threat to privacy. Policies that limit data retention conflict with the goal of accurate auditing, and data owners have to carefully balance the need for policy compliance with the goal of accurate auditing. In this paper, we provide a framework for auditing the changes to a database system while respecting data retention policies. Our framework includes an historical data model that supports flexible audit queries, along with a language for retention policies that can hide individual attribute values or remove entire tuples from the history. Under retention policies, the audit history is partially incomplete. Thus, audit queries on the protected history can include imprecise results. We propose two different models (a tuple-independent model and a tuple-correlated model) for formalizing the meaning of audit queries. We implement policy application and query answering efficiently in a standard relational system and characterize the cases where accurate auditing can be achieved under retention restrictions.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yuan:2013:LLB, author = "Dayu Yuan and Prasenjit Mitra", title = "{Lindex}: a lattice-based index for graph databases", journal = j-VLDB-J, volume = "22", number = "2", pages = "229--252", month = apr, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0284-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Mar 29 15:54:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Subgraph querying has wide applications in various fields such as cheminformatics and bioinformatics. Given a query graph, q, a subgraph-querying algorithm retrieves all graphs, D ( q ), which have q as a subgraph, from a graph database, D. Subgraph querying is costly because it uses subgraph isomorphism tests, which are NP-complete. Graph indices are commonly used to improve the performance of subgraph querying in graph databases. Subgraph-querying algorithms first construct a candidate answer set by filtering out a set of false answers and then verify each candidate graph using subgraph isomorphism tests. To build graph indices, various kinds of substructure (subgraph, subtree, or path) features have been proposed with the goal of maximizing the filtering rate. Each of them works with a specifically designed index structure, for example, discriminative and frequent subgraph features work with gIndex, `? -TCFG features work with FG-index, etc. We propose Lindex, a graph index, which indexes subgraphs contained in database graphs. Nodes in Lindex represent key-value pairs where the key is a subgraph in a database and the value is a list of database graphs containing the key. We propose two heuristics that are used in the construction of Lindex that allows us to determine answers to subgraph queries conducting less subgraph isomorphism tests. Consequently, Lindex improves subgraph-querying efficiency. In addition, Lindex is compatible with any choice of features. Empirically, we demonstrate that Lindex used in conjunction with subgraph indexing features proposed in previous works outperforms other specifically designed index structures. As a novel index structure, Lindex (1) is effective in filtering false graphs (2) provides fast index lookups, (3) is fast with respect to index construction and maintenance, and (4) can be constructed using any set of substructure index features. These four properties result in a fast and scalable subgraph-querying infrastructure. We substantiate the benefits of Lindex and its disk-resident variation Lindex+ theoretically and empirically.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Song:2013:CDH, author = "Shaoxu Song and Lei Chen and Philip S. Yu", title = "Comparable dependencies over heterogeneous data", journal = j-VLDB-J, volume = "22", number = "2", pages = "253--274", month = apr, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0285-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Mar 29 15:54:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "To study the data dependencies over heterogeneous data in dataspaces, we define a general dependency form, namely comparable dependencies (CDS), which specifies constraints on comparable attributes. It covers the semantics of a broad class of dependencies in databases, including functional dependencies (FDS), metric functional dependencies (MFDS), and matching dependencies (MDS). As we illustrated, comparable dependencies are useful in real practice of dataspaces, such as semantic query optimization. Due to heterogeneous data in dataspaces, the first question, known as the validation problem, is to tell whether a dependency (almost) holds in a data instance. Unfortunately, as we proved, the validation problem with certain error or confidence guarantee is generally hard. In fact, the confidence validation problem is also NP-hard to approximate to within any constant factor. Nevertheless, we develop several approaches for efficient approximation computation, such as greedy and randomized approaches with an approximation bound on the maximum number of violations that an object may introduce. Finally, through an extensive experimental evaluation on real data, we verify the superiority of our methods.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Qiao:2013:CWC, author = "Miao Qiao and Hong Cheng and Lu Qin and Jeffrey Xu Yu and Philip S. Yu and Lijun Chang", title = "Computing weight constraint reachability in large networks", journal = j-VLDB-J, volume = "22", number = "3", pages = "275--294", month = jun, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0288-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Jul 17 17:37:10 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Reachability is a fundamental problem on large-scale networks emerging nowadays in various application domains, such as social networks, communication networks, biological networks, road networks, etc. It has been studied extensively. However, little existing work has studied reachability with realistic constraints imposed on graphs with real-valued edge or node weights. In fact, such weights are very common in many real-world networks, for example, the bandwidth of a link in communication networks, the reliability of an interaction between two proteins in PPI networks, and the handling capacity of a warehouse/storage point in a distribution network. In this paper, we formalize a new yet important reachability query in weighted undirected graphs, called weight constraint reachability (WCR) query that asks: is there a path between nodes a and b, on which each real-valued edge (or node) weight satisfies a range constraint. We discover an interesting property of WCR, based on which, we design a novel edge-based index structure to answer the WCR query in O(1) time. Furthermore, we consider the case when the index cannot entirely fit in the memory, which can be very common for emerging massive networks. An I/O-efficient index is proposed, which provides constant I/O (precisely four I/Os) query time with O(|V|\log |V|) disk-based index size. Extensive experimental studies on both real and synthetic datasets demonstrate the efficiency and scalability of our solutions in answering the WCR query.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Toyoda:2013:PDD, author = "Machiko Toyoda and Yasushi Sakurai and Yoshiharu Ishikawa", title = "Pattern discovery in data streams under the time warping distance", journal = j-VLDB-J, volume = "22", number = "3", pages = "295--318", month = jun, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0289-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Jul 17 17:37:10 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Subsequence matching is a basic problem in the field of data stream mining. In recent years, there has been significant research effort spent on efficiently finding subsequences similar to a query sequence. Another challenging issue in relation to subsequence matching is how we identify common local patterns when both sequences are evolving. This problem arises in trend detection, clustering, and outlier detection. Dynamic time warping (DTW) is often used for subsequence matching and is a powerful similarity measure. However, the straightforward method using DTW incurs a high computation cost for this problem. In this paper, we propose a one-pass algorithm, CrossMatch, that achieves the above goal. CrossMatch addresses two important challenges: (1) how can we identify common local patterns efficiently without any omission? (2) how can we find common local patterns in data stream processing? To tackle these challenges, CrossMatch incorporates three ideas: (1) a scoring function, which computes the DTW distance indirectly to reduce the computation cost, (2) a position matrix, which stores starting positions to keep track of common local patterns in a streaming fashion, and (3) a streaming algorithm, which identifies common local patterns efficiently and outputs them on the fly. We provide a theoretical analysis and prove that our algorithm does not sacrifice accuracy. Our experimental evaluation and case studies show that CrossMatch can incrementally discover common local patterns in data streams within constant time (per update) and space.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Xie:2013:UDV, author = "Xike Xie and Reynold Cheng and Man Lung Yiu and Liwen Sun and Jinchuan Chen", title = "{UV-diagram}: a {Voronoi} diagram for uncertain spatial databases", journal = j-VLDB-J, volume = "22", number = "3", pages = "319--344", month = jun, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0290-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Jul 17 17:37:10 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The Voronoi diagram is an important technique for answering nearest-neighbor queries for spatial databases. We study how the Voronoi diagram can be used for uncertain spatial data, which are inherent in scientific and business applications. Specifically, we propose the Uncertain-Voronoi diagram (or UV-diagram ), which divides the data space into disjoint ``UV-partitions''. Each UV-partition P is associated with a set S of objects, such that any point q located in P has the set S as its nearest neighbor with nonzero probabilities. The UV-diagram enables queries that return objects with nonzero chances of being the nearest neighbor (NN) of a given point q. It supports ``continuous nearest-neighbor search'', which refreshes the set of NN objects of q, as the position of q changes. It also allows the analysis of nearest-neighbor information, for example, to find out the number of objects that are the nearest neighbors of any point in a given area. A UV-diagram requires exponential construction and storage costs. To tackle these problems, we devise an alternative representation of a UV-diagram, by using a set of UV-cells. A UV-cell of an object o is the extent e for which o can be the nearest neighbor of any point q \in e. We study how to speed up the derivation of UV-cells by considering its nearby objects. We also use the UV-cells to design the UV-index, which supports different queries, and can be constructed in polynomial time. We have performed extensive experiments on both real and synthetic data to validate the efficiency of our approaches.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhu:2013:HEQ, author = "Yuanyuan Zhu and Lu Qin and Jeffrey Xu Yu and Yiping Ke and Xuemin Lin", title = "High efficiency and quality: large graphs matching", journal = j-VLDB-J, volume = "22", number = "3", pages = "345--368", month = jun, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0292-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Jul 17 17:37:10 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Graph matching plays an essential role in many real applications. In this paper, we study how to match two large graphs by maximizing the number of matched edges, which is known as maximum common subgraph matching and is NP-hard. To find exact matching, it cannot a graph with more than 30 nodes. To find an approximate matching, the quality can be very poor. We propose a novel two-step approach that can efficiently match two large graphs over thousands of nodes with high matching quality. In the first step, we propose an anchor-selection/expansion approach to compute a good initial matching. In the second step, we propose a new approach to refine the initial matching. We give the optimality of our refinement and discuss how to randomly refine the matching with different combinations. We further show how to extend our solution to handle labeled graphs. We conducted extensive testing using real and synthetic datasets and report our findings in this paper.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Baca:2013:OEG, author = "Radim Baca and Michal Kr{\'a}tk{\'y} and Tok Wang Ling and Jiaheng Lu", title = "Optimal and efficient generalized twig pattern processing: a combination of preorder and postorder filterings", journal = j-VLDB-J, volume = "22", number = "3", pages = "369--393", month = jun, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0295-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Jul 17 17:37:10 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Searching for occurrences of a twig pattern query (TPQ) in an XML document is a core task of all XML database query languages. The generalized twig pattern (GTP) extends the TPQ model to include semantics related to output nodes, optional nodes, and boolean expressions which are part of the XQuery language. Preorder filtering holistic algorithms such as TwigStack represent a significant class of TPQ processing approaches with a linear worst-case I/O complexity with respect to the sum of the input and output sizes for some query classes. Another important class of holistic approaches is represented by postorder filtering holistic algorithms such as ${\rm Twig}^2$ Stack which introduced a linear output enumeration time with respect to the result size. In this article, we introduce a holistic algorithm called GTPStack which is the first approach capable of processing a GTP with a linear worst-case I/O complexity with respect to the GTP result size. This is achieved by using a combination of the preorder and postorder filterings before storing nodes in an intermediate storage. Additionally, another contribution of this article is an introduction of a new perspective of holistic algorithm optimality. We show that the optimality depends not only on a query class but also on XML document characteristics. This new view on the optimality extends the general knowledge about the type of queries for which the holistic algorithms are optimal. Moreover, it allows us to determine that GTPStack is optimal for any GTP when a specific XML document is considered. We present a comprehensive experimental study of the state-of-the-art holistic algorithms showing under which conditions GTPStack outperforms the other holistic approaches.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Silva:2013:SQT, author = "Yasin N. Silva and Walid G. Aref and Per-Ake Larson and Spencer S. Pearson and Mohamed H. Ali", title = "Similarity queries: their conceptual evaluation, transformations, and processing", journal = j-VLDB-J, volume = "22", number = "3", pages = "395--420", month = jun, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0296-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Jul 17 17:37:10 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Many application scenarios can significantly benefit from the identification and processing of similarities in the data. Even though some work has been done to extend the semantics of some operators, for example join and selection, to be aware of data similarities, there has not been much study on the role and implementation of similarity-aware operations as first-class database operators. Furthermore, very little work has addressed the problem of evaluating and optimizing queries that combine several similarity operations. The focus of this paper is the study of similarity queries that contain one or multiple first-class similarity database operators such as Similarity Selection, Similarity Join, and Similarity Group-by. Particularly, we analyze the implementation techniques of several similarity operators, introduce a consistent and comprehensive conceptual evaluation model for similarity queries, and present a rich set of transformation rules to extend cost-based query optimization to the case of similarity queries.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Dindar:2013:MES, author = "Nihal Dindar and Nesime Tatbul and Ren{\'e}e J. Miller and Laura M. Haas and Irina Botan", title = "Modeling the execution semantics of stream processing engines with {SECRET}", journal = j-VLDB-J, volume = "22", number = "4", pages = "421--446", month = aug, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0297-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Jul 17 17:37:16 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "There are many academic and commercial stream processing engines (SPEs) today, each of them with its own execution semantics. This variation may lead to seemingly inexplicable differences in query results. In this paper, we present SECRET, a model of the behavior of SPEs. SECRET is a descriptive model that allows users to analyze the behavior of systems and understand the results of window-based queries (with time- and tuple-based windows) for a broad range of heterogeneous SPEs. The model is the result of extensive analysis and experimentation with several commercial and academic engines. In the paper, we describe the types of heterogeneity found in existing engines and show with experiments on real systems that our model can explain the key differences in windowing behavior.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Elghandour:2013:RXP, author = "Iman Elghandour and Ashraf Aboulnaga and Daniel C. Zilio and Calisto Zuzarte", title = "Recommending {XML} physical designs for {XML} databases", journal = j-VLDB-J, volume = "22", number = "4", pages = "447--470", month = aug, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0298-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Jul 17 17:37:16 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Database systems employ physical structures such as indexes and materialized views to improve query performance, potentially by orders of magnitude. It is therefore important for a database administrator to choose the appropriate configuration of these physical structures for a given database. XML database systems are increasingly being used to manage semi-structured data, and XML support has been added to commercial database systems. In this paper, we address the problem of automatic physical design for XML databases, which is the process of automatically selecting the best set of physical structures for a database and a query workload. We focus on recommending two types of physical structures: XML indexes and relational materialized views of XML data. We present a design advisor for recommending XML indexes, one for recommending materialized views, and an integrated design advisor that recommends both indexes and materialized views. A key characteristic of our advisors is that they are tightly coupled with the query optimizer of the database system, and they rely on the optimizer for enumerating and evaluating physical designs. We have implemented our advisors in a prototype version of IBM DB2 V9, and we experimentally demonstrate the effectiveness of their recommendations using this implementation.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Mazuran:2013:EPD, author = "Mirjana Mazuran and Edoardo Serra and Carlo Zaniolo", title = "Extending the power of datalog recursion", journal = j-VLDB-J, volume = "22", number = "4", pages = "471--493", month = aug, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0299-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Jul 17 17:37:16 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Supporting aggregates in recursive logic rules represents a very important problem for Datalog. To solve this problem, we propose a simple extension, called DatalogFS (Datalog extended with frequency support goals), that supports queries and reasoning about the number of distinct variable assignments satisfying given goals, or conjunctions of goals, in rules. This monotonic extension greatly enhances the power of Datalog, while preserving (i) its declarative semantics and (ii) its amenability to efficient implementation via differential fixpoint and other optimization techniques presented in the paper. Thus, DatalogFS enables the efficient formulation of queries that could not be expressed efficiently or could not be expressed at all in Datalog with stratified negation and aggregates. In fact, using a generalized notion of multiplicity called frequency, we show that diffusion models and page rank computations can be easily expressed and efficiently implemented using DatalogFS.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Galpin:2013:QAO, author = "Ixent Galpin and Alvaro A. Fernandes and Norman W. Paton", title = "{QoS}-aware optimization of sensor network queries", journal = j-VLDB-J, volume = "22", number = "4", pages = "495--517", month = aug, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0300-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Jul 17 17:37:16 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The resource-constrained nature of mote-level wireless sensor networks (WSNs) poses challenges for the design of a general-purpose sensor network query processors (SNQPs). Existing SNQPs tend to generate query execution plans (QEPs) that are selected on the basis of a fixed, implicit expectation, for example, that energy consumption should be kept as small as possible. However, in WSN applications, the same query may be subject to several, possibly conflicting, quality-of-service (QoS) expectations concomitantly (for example maximizing data acquisition rates subject to keeping energy consumption low). It is also not uncommon for the QoS expectations to change over the lifetime of a deployment (for example from low to high data acquisition rates). This paper describes optimization algorithms that respond to stated QoS expectations (about acquisition rate, delivery time, energy consumption and lifetime) when making routing, placement, and timing decisions for in-WSN query processing. The paper shows experimentally that QoS-awareness offers significant benefits in responding to, and reconciling, diverse QoS expectations, thereby enabling QoS-aware SNQPs to generate efficient QEPs for a broader range WSN applications than has hitherto been possible.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Deutch:2013:TQW, author = "Daniel Deutch and Tova Milo and Neoklis Polyzotis", title = "Top-$k$ queries over {Web} applications", journal = j-VLDB-J, volume = "22", number = "4", pages = "519--542", month = aug, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0303-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Jul 17 17:37:16 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The core logic of web applications that suggest some particular service, such as online shopping, e-commerce etc., is typically captured by Business Processes (BPs). Among all the (maybe infinitely many) possible execution flows of a BP, analysts are often interested in identifying flows that are ``most important'', according to some weight metric. The goal of the present paper is to provide efficient algorithms for top-$k$ query evaluation over the possible executions of Business Processes, under some given weight function. Unique difficulties in top-$k$ analysis in this settings stem from (1) the fact that the number of possible execution flows of a given BP is typically very large, or even infinite in presence of recursion and (2) that the weights (e.g., likelihood, monetary cost, etc.) induced by actions performed during the execution (e.g., product purchase) may be inter-dependent (due to probabilistic dependencies, combined discount deals etc.). We exemplify these difficulties, and overcome them to provide efficient algorithms for query evaluation where possible. We also describe in details an application prototype that we have developed for recommending optimal navigation in an online shopping web site that is based on our model and algorithms.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Gao:2013:OSD, author = "Jun Gao and Jeffrey Xu Yu and Ruoming Jin and Jiashuai Zhou and Tengjiao Wang and Dongqing Yang", title = "Outsourcing shortest distance computing with privacy protection", journal = j-VLDB-J, volume = "22", number = "4", pages = "543--559", month = aug, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0304-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Jul 17 17:37:16 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "With the advent of cloud computing, it becomes desirable to outsource graphs into cloud servers to efficiently perform complex operations without compromising their sensitive information. In this paper, we take the shortest distance computation as a case to investigate the technique issues in outsourcing graph operations. We first propose a parameter-free, edge-based 2-HOP delegation security model (shorten as 2-HOP delegation model), which can greatly reduce the chances of the structural pattern attack and the graph reconstruction attack. We then transform the original graph into a link graph $ G_l $ kept locally and a set of outsourced graphs $ \mathcal G_o $. Our objectives include (i) ensuring each outsourced graph meeting the requirement of 2-HOP delegation model, (ii) making shortest distance queries be answered using $ G_l $ and $ \mathcal G_o $, (iii) minimizing the space cost of $ G_l $. We devise a greedy method to produce $ G_l $ and $ \mathcal G_o $, which can exactly answer shortest distance queries. We also develop an efficient transformation method to support approximate shortest distance answering under a given average additive error bound. The experimental results illustrate the effectiveness and efficiency of our method.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Kalashnikov:2013:SEF, author = "Dmitri V. Kalashnikov", title = "{Super-EGO}: fast multi-dimensional similarity join", journal = j-VLDB-J, volume = "22", number = "4", pages = "561--585", month = aug, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-012-0305-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Jul 17 17:37:16 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Efficient processing of high-dimensional similarity joins plays an important role for a wide variety of data-driven applications. In this paper, we consider \varepsilon -join variant of the problem. Given two d -dimensional datasets and parameter \varepsilon, the task is to find all pairs of points, one from each dataset that are within \varepsilon distance from each other. We propose a new \varepsilon -join algorithm, called Super-EGO, which belongs the EGO family of join algorithms. The new algorithm gains its advantage by using novel data-driven dimensionality re-ordering technique, developing a new EGO-strategy that more aggressively avoids unnecessary computation, as well as by developing a parallel version of the algorithm. We study the newly proposed Super-EGO algorithm on large real and synthetic datasets. The empirical study demonstrates significant advantage of the proposed solution over the existing state of the art techniques.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Brambilla:2013:SIS, author = "Marco Brambilla and Stefano Ceri and Alon Halevy", title = "Special issue on structured and crowd-sourced data on the {Web}", journal = j-VLDB-J, volume = "22", number = "5", pages = "587--588", month = oct, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0327-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Dec 16 16:57:30 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Goasdoue:2013:GTT, author = "Fran{\c{c}}ois Goasdou{\'e} and Konstantinos Karanasos and Yannis Katsis and Julien Leblay and Ioana Manolescu and Stamatis Zampetakis", title = "Growing triples on trees: an {XML--RDF} hybrid model for annotated documents", journal = j-VLDB-J, volume = "22", number = "5", pages = "589--613", month = oct, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0321-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Dec 16 16:57:30 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Since the beginning of the Semantic Web initiative, significant efforts have been invested in finding efficient ways to publish, store, and query metadata on the Web. RDF and SPARQL have become the standard data model and query language, respectively, to describe resources on the Web. Large amounts of RDF data are now available either as stand-alone datasets or as metadata over semi-structured (typically XML) documents. The ability to apply RDF annotations over XML data emphasizes the need to represent and query data and metadata simultaneously. We propose XR, a novel hybrid data model capturing the structural aspects of XML data and the semantics of RDF, also enabling us to reason about XML data. Our model is general enough to describe pure XML or RDF datasets, as well as RDF-annotated XML data, where any XML node can act as a resource. This data model comes with the XRQ query language that combines features of both XQuery and SPARQL. To demonstrate the feasibility of this hybrid XML-RDF data management setting, and to validate its interest, we have developed an XR platform on top of well-known data management systems for XML and RDF. In particular, the platform features several XRQ query processing algorithms, whose performance is experimentally compared.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Furche:2013:OKA, author = "Tim Furche and Georg Gottlob and Giovanni Grasso and Xiaonan Guo and Giorgio Orsi and Christian Schallhart", title = "The ontological key: automatically understanding and integrating forms to access the deep {Web}", journal = j-VLDB-J, volume = "22", number = "5", pages = "615--640", month = oct, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0323-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Dec 16 16:57:30 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Forms are our gates to the Web. They enable us to access the deep content of Web sites. Automatic form understanding provides applications, ranging from crawlers over meta-search engines to service integrators, with a key to this content. Yet, it has received little attention other than as component in specific applications such as crawlers or meta-search engines. No comprehensive approach to form understanding exists, let alone one that produces rich models for semantic services or integration with linked open data. In this paper, we present opal, the first comprehensive approach to form understanding and integration. We identify form labeling and form interpretation as the two main tasks involved in form understanding. On both problems, opal advances the state of the art: For form labeling, it combines features from the text, structure, and visual rendering of a Web page. In extensive experiments on the ICQ and TEL-8 benchmarks and a set of 200 modern Web forms, opal outperforms previous approaches for form labeling by a significant margin. For form interpretation, opal uses a schema (or ontology) of forms in a given domain. Thanks to this domain schema, it is able to produce nearly perfect ($ \gg 97 $ \% accuracy in the evaluation domains) form interpretations. Yet, the effort to produce a domain schema is very low, as we provide a datalog-based template language that eases the specification of such schemata and a methodology for deriving a domain schema largely automatically from an existing domain ontology. We demonstrate the value of opal's form interpretations through a light-weight form integration system that successfully translates and distributes master queries to hundreds of forms with no error, yet is implemented with only a handful translation rules.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Bozzon:2013:ESF, author = "Alessandro Bozzon and Marco Brambilla and Stefano Ceri and Davide Mazza", title = "Exploratory search framework for {Web} data sources", journal = j-VLDB-J, volume = "22", number = "5", pages = "641--663", month = oct, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0326-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Dec 16 16:57:30 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Exploratory search is an information seeking behavior where users progressively learn about one or more topics of interest; it departs quite radically from traditional keyword-based query paradigms, as it combines querying and browsing of resources, and covers activities such as investigating, evaluating, comparing, and synthesizing retrieved information. In most cases, such activities are enabled by a conceptual description of information in terms of entities and their semantic relationships. Customized Web applications, where few applicative entities and their relationships are embedded within the application logics, typically provide some support to exploratory search, which is, however, specific for a given domain. In this paper, we describe a general-purpose exploratory search framework, i.e., a framework which is neutral to the application logic. Our contribution consists of the formalization of the exploratory search paradigm over Web data sources, accessed by means of services; extracted information is described by means of an entity-relationship schema, which masks the service implementations. Exploratory interaction is supported by a general-purpose user interface including a set of widgets for data exploration, from big tables to atomic tables, visual diagrams, and geographic maps; the user interaction is translated to queries defined in \mathcal S \hbox {e}\mathcal C \hbox {oQL} S e C oQL, a SQL-like language and protocol specifically designed for supporting exploratory search over data sources. We illustrate the software architecture of our prototype, which uses the interplay of a query and result management system with an orchestrator, capable of incrementally building queries and of walking through the past navigation history. The distinctive feature of the framework is the ability to extract top solutions, which combine top-ranked entity instances. We evaluate exploratory search from the end-user perspective in the context of a cognitive model for search, by studying the user's behavior and the effectiveness of exploratory search in terms of quality of results produced by the search process; we also compare the effectiveness of interaction in using our multi-domain search system with the use of various replicas of the system, each acting upon a single domain, and with the use of conventional search engines.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Demartini:2013:LSL, author = "Gianluca Demartini and Djellel Eddine Difallah and Philippe Cudr{\'e}-Mauroux", title = "Large-scale linked data integration using probabilistic reasoning and crowdsourcing", journal = j-VLDB-J, volume = "22", number = "5", pages = "665--687", month = oct, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0324-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Dec 16 16:57:30 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We tackle the problems of semiautomatically matching linked data sets and of linking large collections of Web pages to linked data. Our system, ZenCrowd, (1) uses a three-stage blocking technique in order to obtain the best possible instance matches while minimizing both computational complexity and latency, and (2) identifies entities from natural language text using state-of-the-art techniques and automatically connects them to the linked open data cloud. First, we use structured inverted indices to quickly find potential candidate results from entities that have been indexed in our system. Our system then analyzes the candidate matches and refines them whenever deemed necessary using computationally more expensive queries on a graph database. Finally, we resort to human computation by dynamically generating crowdsourcing tasks in case the algorithmic components fail to come up with convincing results. We integrate all results from the inverted indices, from the graph database and from the crowd using a probabilistic framework in order to make sensible decisions about candidate matches and to identify unreliable human workers. In the following, we give an overview of the architecture of our system and describe in detail our novel three-stage blocking technique and our probabilistic decision framework. We also report on a series of experimental results on a standard data set, showing that our system can achieve a 95 \% average accuracy on instance matching (as compared to the initial 88 \% average accuracy of the purely automatic baseline) while drastically limiting the amount of work performed by the crowd. The experimental evaluation of our system on the entity linking task shows an average relative improvement of 14 \% over our best automatic approach.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Sagi:2013:SMP, author = "Tomer Sagi and Avigdor Gal", title = "Schema matching prediction with applications to data source discovery and dynamic ensembling", journal = j-VLDB-J, volume = "22", number = "5", pages = "689--710", month = oct, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0325-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Dec 16 16:57:30 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Web-scale data integration involves fully automated efforts which lack knowledge of the exact match between data descriptions. In this paper, we introduce schema matching prediction, an assessment mechanism to support schema matchers in the absence of an exact match. Given attribute pair-wise similarity measures, a predictor predicts the success of a matcher in identifying correct correspondences. We present a comprehensive framework in which predictors can be defined, designed, and evaluated. We formally define schema matching evaluation and schema matching prediction using similarity spaces and discuss a set of four desirable properties of predictors, namely correlation, robustness, tunability, and generalization. We present a method for constructing predictors, supporting generalization, and introduce prediction models as means of tuning prediction toward various quality measures. We define the empirical properties of correlation and robustness and provide concrete measures for their evaluation. We illustrate the usefulness of schema matching prediction by presenting three use cases: We propose a method for ranking the relevance of deep Web sources with respect to given user needs. We show how predictors can assist in the design of schema matching systems. Finally, we show how prediction can support dynamic weight setting of matchers in an ensemble, thus improving upon current state-of-the-art weight setting methods. An extensive empirical evaluation shows the usefulness of predictors in these use cases and demonstrates the usefulness of prediction models in increasing the performance of schema matching.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lee:2013:HEC, author = "Jongwuk Lee and Hyunsouk Cho and Jin-Woo Park and Young-Rok Cha and Seung-Won Hwang and Zaiqing Nie and Ji-Rong Wen", title = "Hybrid entity clustering using crowds and data", journal = j-VLDB-J, volume = "22", number = "5", pages = "711--726", month = oct, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0328-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Dec 16 16:57:30 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Query result clustering has attracted considerable attention as a means of providing users with a concise overview of results. However, little research effort has been devoted to organizing the query results for entities which refer to real-world concepts, e.g., people, products, and locations. Entity-level result clustering is more challenging because diverse similarity notions between entities need to be supported in heterogeneous domains, e.g., image resolution is an important feature for cameras, but not for fruits. To address this challenge, we propose a hybrid relationship clustering algorithm, called Hydra, using co-occurrence and numeric features. Algorithm Hydra captures diverse user perceptions from co-occurrence and disambiguates different senses using feature-based similarity. In addition, we extend Hydra into $ \mathsf {Hydra}_\mathsf {gData} $ with different sources, i.e., entity types and crowdsourcing. Experimental results show that the proposed algorithms achieve effectiveness and efficiency in real-life and synthetic datasets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhao:2013:EPG, author = "Xiang Zhao and Chuan Xiao and Xuemin Lin and Wei Wang and Yoshiharu Ishikawa", title = "Efficient processing of graph similarity queries with edit distance constraints", journal = j-VLDB-J, volume = "22", number = "6", pages = "727--752", month = dec, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0306-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Feb 13 09:58:45 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Graphs are widely used to model complicated data semantics in many applications in bioinformatics, chemistry, social networks, pattern recognition, etc. A recent trend is to tolerate noise arising from various sources such as erroneous data entries and find similarity matches. In this paper, we study graph similarity queries with edit distance constraints. Inspired by the q -gram idea for string similarity problems, our solution extracts paths from graphs as features for indexing. We establish a lower bound of common features to generate candidates. Efficient algorithms are proposed to handle three types of graph similarity queries by exploiting both matching and mismatching features as well as degree information to improve the filtering and verification on candidates. We demonstrate the proposed algorithms significantly outperform existing approaches with extensive experiments on real and synthetic datasets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Gemulla:2013:NUI, author = "Rainer Gemulla and Peter J. Haas and Wolfgang Lehner", title = "Non-uniformity issues and workarounds in bounded-size sampling", journal = j-VLDB-J, volume = "22", number = "6", pages = "753--772", month = dec, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0307-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Feb 13 09:58:45 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A variety of schemes have been proposed in the literature to speed up query processing and analytics by incrementally maintaining a bounded-size uniform sample from a dataset in the presence of a sequence of insertion, deletion, and update transactions. These algorithms vary according to whether the dataset is an ordinary set or a multiset and whether the transaction sequence consists only of insertions or can include deletions and updates. We report on subtle non-uniformity issues that we found in a number of these prior bounded-size sampling schemes, including some of our own. We provide workarounds that can avoid the non-uniformity problem; these workarounds are easy to implement and incur negligible additional cost. We also consider the impact of non-uniformity in practice and describe simple statistical tests that can help detect non-uniformity in new algorithms.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Whang:2013:JER, author = "Steven Euijong Whang and Hector Garcia-Molina", title = "Joint entity resolution on multiple datasets", journal = j-VLDB-J, volume = "22", number = "6", pages = "773--795", month = dec, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0308-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Feb 13 09:58:45 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Entity resolution (ER) is the problem of identifying which records in a database represent the same entity. Often, records of different types are involved (e.g., authors, publications, institutions, venues), and resolving records of one type can impact the resolution of other types of records. In this paper we propose a flexible, modular resolution framework where existing ER algorithms developed for a given record type can be plugged in and used in concert with other ER algorithms. Our approach also makes it possible to run ER on subsets of similar records at a time, important when the full data are too large to resolve together. We study the scheduling and coordination of the individual ER algorithms, in order to resolve the full dataset, and show the scalability of our approach. We also introduce a ``state-based'' training technique where each ER algorithm is trained for the particular execution context (relative to other types of records) where it will be used.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Xu:2013:DPH, author = "Jia Xu and Zhenjie Zhang and Xiaokui Xiao and Yin Yang and Ge Yu and Marianne Winslett", title = "Differentially private histogram publication", journal = j-VLDB-J, volume = "22", number = "6", pages = "797--822", month = dec, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0309-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Feb 13 09:58:45 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Differential privacy (DP) is a promising scheme for releasing the results of statistical queries on sensitive data, with strong privacy guarantees against adversaries with arbitrary background knowledge. Existing studies on differential privacy mostly focus on simple aggregations such as counts. This paper investigates the publication of DP-compliant histograms, which is an important analytical tool for showing the distribution of a random variable, e.g., hospital bill size for certain patients. Compared to simple aggregations whose results are purely numerical, a histogram query is inherently more complex, since it must also determine its structure, i.e., the ranges of the bins. As we demonstrate in the paper, a DP-compliant histogram with finer bins may actually lead to significantly lower accuracy than a coarser one, since the former requires stronger perturbations in order to satisfy DP. Moreover, the histogram structure itself may reveal sensitive information, which further complicates the problem. Motivated by this, we propose two novel mechanisms, namely NoiseFirst and StructureFirst, for computing DP-compliant histograms. Their main difference lies in the relative order of the noise injection and the histogram structure computation steps. NoiseFirst has the additional benefit that it can improve the accuracy of an already published DP-compliant histogram computed using a naive method. For each of proposed mechanisms, we design algorithms for computing the optimal histogram structure with two different objectives: minimizing the mean square error and the mean absolute error, respectively. Going one step further, we extend both mechanisms to answer arbitrary range queries. Extensive experiments, using several real datasets, confirm that our two proposals output highly accurate query answers and consistently outperform existing competitors.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Fink:2013:AAP, author = "Robert Fink and Jiewen Huang and Dan Olteanu", title = "Anytime approximation in probabilistic databases", journal = j-VLDB-J, volume = "22", number = "6", pages = "823--848", month = dec, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0310-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Feb 13 09:58:45 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This article describes an approximation algorithm for computing the probability of propositional formulas over discrete random variables. It incrementally refines lower and upper bounds on the probability of the formulas until the desired absolute or relative error guarantee is reached. This algorithm is used by the SPROUT query engine to approximate the probabilities of results to relational algebra queries on expressive probabilistic databases.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Drosou:2013:YER, author = "Marina Drosou and Evaggelia Pitoura", title = "{YmalDB}: exploring relational databases via result-driven recommendations", journal = j-VLDB-J, volume = "22", number = "6", pages = "849--874", month = dec, year = "2013", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0311-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Feb 13 09:58:45 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The typical user interaction with a database system is through queries. However, many times users do not have a clear understanding of their information needs or the exact content of the database. In this paper, we propose assisting users in database exploration by recommending to them additional items, called Ymal (``You May Also Like'') results, that, although not part of the result of their original query, appear to be highly related to it. Such items are computed based on the most interesting sets of attribute values, called faSets, that appear in the result of the original query. The interestingness of a faSet is defined based on its frequency in the query result and in the database. Database frequency estimations rely on a novel approach of maintaining a set of representative rare faSets. We have implemented our approach and report results regarding both its performance and its usefulness.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Johnson:2014:EUC, author = "Ryan Johnson and Ippokratis Pandis and Anastasia Ailamaki", title = "Eliminating unscalable communication in transaction processing", journal = j-VLDB-J, volume = "23", number = "1", pages = "1--23", month = feb, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0312-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Feb 13 09:58:46 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Multicore hardware demands software parallelism. Transaction processing workloads typically exhibit high concurrency, and, thus, provide ample opportunities for parallel execution. Unfortunately, because of the characteristics of the application, transaction processing systems must moderate and coordinate communication between independent agents; since it is notoriously difficult to implement high performing transaction processing systems that incur no communication whatsoever. As a result, transaction processing systems cannot always convert abundant, even embarrassing, request-level parallelism into execution parallelism due to communication bottlenecks. Transaction processing system designers must therefore find ways to achieve scalability while still allowing communication to occur. To this end, we identify three forms of communication in the system-- unbounded, fixed, and cooperative --and argue that only the first type poses a fundamental threat to scalability. The other two types tend not impose obstacles to scalability, though they may reduce single-thread performance. We argue that proper analysis of communication patterns in any software system is a powerful tool for improving the system's scalability. Then, we present and evaluate under a common framework techniques that attack significant sources of unbounded communication during transaction processing and sketch a solution for those that remain. The solutions we present affect fundamental services of any transaction processing engine, such as locking, logging, physical page accesses, and buffer pool frame accesses. They either reduce such communication through caching, downgrade it to a less-threatening type, or eliminate it completely through system design. We find that the later technique, revisiting the transaction processing architecture, is the most effective. The final design cuts unbounded communication by roughly an order of magnitude compared with the baseline, while exhibiting better scalability on multicore machines.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhou:2014:EQP, author = "Junfeng Zhou and Zhifeng Bao and Wei Wang and Jinjia Zhao and Xiaofeng Meng", title = "Efficient query processing for {XML} keyword queries based on the {IDList} index", journal = j-VLDB-J, volume = "23", number = "1", pages = "25--50", month = feb, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0313-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Feb 13 09:58:46 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Keyword search over XML data has attracted a lot of research efforts in the last decade, where one of the fundamental research problems is how to efficiently answer a given keyword query w.r.t. a certain query semantics. We found that the key factor resulting in the inefficiency for existing methods is that they all heavily suffer from the common-ancestor-repetition problem. In this paper, we propose a novel form of inverted list, namely the IDList; the IDList for keyword k consists of ordered nodes that directly or indirectly contain k. We then show that finding keyword query results based on the smallest lowest common ancestor and exclusive lowest common ancestor semantics can be reduced to ordered set intersection problem, which has been heavily optimized due to its application in areas such as information retrieval and database systems. We propose several algorithms that exploit set intersection in different directions and with or without using additional indexes. We further propose several algorithms that are based on hash search to simplify the operation of finding common nodes from all involved IDLists. We have conducted an extensive set of experiments using many state-of-the-art algorithms and several large-scale datasets. The results demonstrate that our proposed methods outperform existing methods by up to two orders of magnitude in many cases.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Slavov:2014:GBA, author = "Vasil Slavov and Praveen Rao", title = "A gossip-based approach for {Internet}-scale cardinality estimation of {XPath} queries over distributed semistructured data", journal = j-VLDB-J, volume = "23", number = "1", pages = "51--76", month = feb, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0314-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Feb 13 09:58:46 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper, we address the problem of cardinality estimation of XPath queries over XML data stored in a distributed, Internet-scale environment such as a large-scale, data sharing system designed to foster innovations in biomedical and health informatics. The cardinality estimate of XPath expressions is useful in XQuery optimization, designing IR-style relevance ranking schemes, and statistical hypothesis testing. We present a novel gossip algorithm called XGossip, which given an XPath query estimates the number of XML documents in the network that contain a match for the query. XGossip is designed to be scalable, decentralized, and robust to failures--properties that are desirable in a large-scale distributed system. XGossip employs a novel divide-and-conquer strategy for load balancing and reducing the bandwidth consumption. We conduct theoretical analysis of XGossip in terms of accuracy of cardinality estimation, message complexity, and bandwidth consumption. We present a comprehensive performance evaluation of XGossip on Amazon EC2 using a heterogeneous collection of XML documents.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Whang:2014:IER, author = "Steven Euijong Whang and Hector Garcia-Molina", title = "Incremental entity resolution on rules and data", journal = j-VLDB-J, volume = "23", number = "1", pages = "77--102", month = feb, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0315-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Feb 13 09:58:46 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Entity resolution (ER) identifies database records that refer to the same real-world entity. In practice, ER is not a one-time process, but is constantly improved as the data, schema and application are better understood. We first address the problem of keeping the ER result up-to-date when the ER logic or data ``evolve'' frequently. A na{\"\i}ve approach that re-runs ER from scratch may not be tolerable for resolving large datasets. This paper investigates when and how we can instead exploit previous ``materialized'' ER results to save redundant work with evolved logic and data. We introduce algorithm properties that facilitate evolution, and we propose efficient rule and data evolution techniques for three ER models: match-based clustering (records are clustered based on Boolean matching information), distance-based clustering (records are clustered based on relative distances), and pairs ER (the pairs of matching records are identified). Using real datasets, we illustrate the cost of materializations and the potential gains of evolution over the na{\"\i}ve approach.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Beskales:2014:SRC, author = "George Beskales and Ihab F. Ilyas and Lukasz Golab and Artur Galiullin", title = "Sampling from repairs of conditional functional dependency violations", journal = j-VLDB-J, volume = "23", number = "1", pages = "103--128", month = feb, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0316-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Feb 13 09:58:46 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Violations of functional dependencies (FDs) and conditional functional dependencies (CFDs) are common in practice, often indicating deviations from the intended data semantics. These violations arise in many contexts such as data integration and Web data extraction. Resolving these violations is challenging for a variety of reasons, one of them being the exponential number of possible repairs. Most of the previous work has tackled this problem by producing a single repair that is nearly optimal with respect to some metric. In this paper, we propose a novel data cleaning approach that is not limited to finding a single repair, namely sampling from the space of possible repairs. We give several motivating scenarios where sampling from the space of CFD repairs is desirable, we propose a new class of useful repairs, and we present an algorithm that randomly samples from this space in an efficient way. We also show how to restrict the space of repairs based on constraints that reflect the accuracy of different parts of the database. We experimentally evaluate our algorithms against previous approaches to show the utility and efficiency of our approach.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lee:2014:TEM, author = "Jongwuk Lee and Seung-Won Hwang", title = "Toward efficient multidimensional subspace skyline computation", journal = j-VLDB-J, volume = "23", number = "1", pages = "129--145", month = feb, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0317-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Feb 13 09:58:46 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Skyline queries have attracted considerable attention to assist multicriteria analysis of large-scale datasets. In this paper, we focus on multidimensional subspace skyline computation that has been actively studied for two approaches. First, to narrow down a full-space skyline, users may consider multiple subspace skylines reflecting their interest. For this purpose, we tackle the concept of a skycube, which consists of all possible non-empty subspace skylines in a given full space. Second, to understand diverse semantics of subspace skylines, we address skyline groups in which a skyline point (or a set of skyline points) is annotated with decisive subspaces. Our primary contributions are to identify common building blocks of the two approaches and to develop orthogonal optimization principles that benefit both approaches. Our experimental results show the efficiency of proposed algorithms by comparing them with state-of-the-art algorithms in both synthetic and real-life datasets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zellag:2014:CAM, author = "Kamal Zellag and Bettina Kemme", title = "Consistency anomalies in multi-tier architectures: automatic detection and prevention", journal = j-VLDB-J, volume = "23", number = "1", pages = "147--172", month = feb, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0318-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Feb 13 09:58:46 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Modern transaction systems, consisting of an application server tier and a database tier, offer several levels of isolation providing a trade-off between performance and consistency. While it is fairly well known how to identify qualitatively the anomalies that are possible under a certain isolation level, it is much more difficult to detect and quantify such anomalies during run-time of a given application. In this paper, we present a new approach to detect and quantify consistency anomalies for arbitrary multi-tier application running under any isolation levels ensuring at least read committed. In fact, the application can run even under a mixture of isolation levels. Our detection approach can be online or off-line and for each detected anomaly, we identify exactly the transactions and data items involved. Furthermore, we classify the detected anomalies into patterns showing the business methods involved as well as analyzing the types of cycles that occur. Our approach can help designers to either choose an isolation level where the anomalies do not occur or to change the transaction design to avoid the anomalies. Furthermore, we provide an option in which the occurrence of anomalies can be automatically reduced during run-time. To test the effectiveness and efficiency of our approach, we have conducted a set of experiments using a wide range of benchmarks.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } %% TO DO: [10-Nov-2016] v23n2 (spring 2014) is STILL missing at ACM Web %% site, but data have been found at the Springer Web site and converted %% to BibTeX, so coverage is complete again. %% Reported to ACM on [18-Sep-2015] via portal-feedbackhq.acm.org. @Article{Ozsoyoglu:2014:SIB, author = "Z. Meral {\"O}zsoyo{\u{g}}lu and U{\u{g}}ur {\c{C}}etintemel and Nilesh Dalvi and Hank Korth and Anthony Tung", title = "Special issue on best papers of {VLDB 2012}", journal = j-VLDB-J, volume = "23", number = "2", pages = "173--174", month = apr, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0356-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Mar 26 17:19:12 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-014-0356-z; http://link.springer.com/content/pdf/10.1007/s00778-014-0356-z.pdf", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://link.springer.com/journal/778", } @Article{Angel:2014:DSM, author = "Albert Angel and Nick Koudas and Nikos Sarkas and Divesh Srivastava and Michael Svendsen and Srikanta Tirthapura", title = "Dense subgraph maintenance under streaming edge weight updates for real-time story identification", journal = j-VLDB-J, volume = "23", number = "2", pages = "175--199", month = apr, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0340-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Mar 26 17:19:12 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-013-0340-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://link.springer.com/journal/778", } @Article{Das:2014:EFE, author = "Mahashweta Das and Saravanan Thirumuruganathan and Sihem Amer-Yahia and Gautam Das and Cong Yu", title = "An expressive framework and efficient algorithms for the analysis of collaborative tagging", journal = j-VLDB-J, volume = "23", number = "2", pages = "201--226", month = apr, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0341-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Mar 26 17:19:12 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-013-0341-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://link.springer.com/journal/778", } @Article{Cheng:2014:EPH, author = "James Cheng and Zechao Shang and Hong Cheng and Haixun Wang and Jeffrey Xu Yu", title = "Efficient processing of $k$-hop reachability queries", journal = j-VLDB-J, volume = "23", number = "2", pages = "227--252", month = apr, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0346-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Mar 26 17:19:12 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-013-0346-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://link.springer.com/journal/778", } @Article{Koch:2014:DHO, author = "Christoph Koch and Yanif Ahmad and Oliver Kennedy and Milos Nikolic and Andres N{\"o}tzli and Daniel Lupei and Amir Shaikhha", title = "{DBToaster}: higher-order delta processing for dynamic, frequently fresh views", journal = j-VLDB-J, volume = "23", number = "2", pages = "253--278", month = apr, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0348-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Mar 26 17:19:12 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-013-0348-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://link.springer.com/journal/778", } @Article{Bailis:2014:QEC, author = "Peter Bailis and Shivaram Venkataraman and Michael J. Franklin and Joseph M. Hellerstein and Ion Stoica", title = "Quantifying eventual consistency with {PBS}", journal = j-VLDB-J, volume = "23", number = "2", pages = "279--302", month = apr, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0330-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Mar 26 17:19:12 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-013-0330-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://link.springer.com/journal/778", } @Article{Graefe:2014:TSA, author = "Goetz Graefe and Felix Halim and Stratos Idreos and Harumi Kuno and Stefan Manegold and Bernhard Seeger", title = "Transactional support for adaptive indexing", journal = j-VLDB-J, volume = "23", number = "2", pages = "303--328", month = apr, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0345-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Mar 26 17:19:12 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-013-0345-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://link.springer.com/journal/778", } @Article{Zhang:2014:TCE, author = "Ning Zhang and Junichi Tatemura and Jignesh M. Patel and Hakan Hacigumus", title = "Toward cost-effective storage provisioning for {DBMSs}", journal = j-VLDB-J, volume = "23", number = "2", pages = "329--354", month = apr, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0334-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Mar 26 17:19:12 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-013-0334-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://link.springer.com/journal/778", } @Article{Doulkeridis:2014:SLS, author = "Christos Doulkeridis and Kjetil N{\o}rv{\aa}g", title = "A survey of large-scale analytical query processing in {MapReduce}", journal = j-VLDB-J, volume = "23", number = "3", pages = "355--380", month = jun, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0319-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 29 06:13:52 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Enterprises today acquire vast volumes of data from different sources and leverage this information by means of data analysis to support effective decision-making and provide new functionality and services. The key requirement of data analytics is scalability, simply due to the immense volume of data that need to be extracted, processed, and analyzed in a timely fashion. Arguably the most popular framework for contemporary large-scale data analytics is MapReduce, mainly due to its salient features that include scalability, fault-tolerance, ease of programming, and flexibility. However, despite its merits, MapReduce has evident performance limitations in miscellaneous analytical tasks, and this has given rise to a significant body of research that aim at improving its efficiency, while maintaining its desirable properties. This survey aims to review the state of the art in improving the performance of parallel query processing using MapReduce. A set of the most significant weaknesses and limitations of MapReduce is discussed at a high level, along with solving techniques. A taxonomy is presented for categorizing existing research on MapReduce improvements according to the specific problem they target. Based on the proposed taxonomy, a classification of existing research is provided focusing on the optimization objective. Concluding, we outline interesting directions for future parallel data processing systems.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhou:2014:EDT, author = "Xiangmin Zhou and Lei Chen", title = "Event detection over {Twitter} social media streams", journal = j-VLDB-J, volume = "23", number = "3", pages = "381--400", month = jun, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0320-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 29 06:13:52 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In recent years, microblogs have become an important source for reporting real-world events. A real-world occurrence reported in microblogs is also called a social event. Social events may hold critical materials that describe the situations during a crisis. In real applications, such as crisis management and decision making, monitoring the critical events over social streams will enable watch officers to analyze a whole situation that is a composite event, and make the right decision based on the detailed contexts such as what is happening, where an event is happening, and who are involved. Although there has been significant research effort on detecting a target event in social networks based on a single source, in crisis, we often want to analyze the composite events contributed by different social users. So far, the problem of integrating ambiguous views from different users is not well investigated. To address this issue, we propose a novel framework to detect composite social events over streams, which fully exploits the information of social data over multiple dimensions. Specifically, we first propose a graphical model called location-time constrained topic (LTT) to capture the content, time, and location of social messages. Using LTT, a social message is represented as a probability distribution over a set of topics by inference, and the similarity between two messages is measured by the distance between their distributions. Then, the events are identified by conducting efficient similarity joins over social media streams. To accelerate the similarity join, we also propose a variable dimensional extendible hash over social streams. We have conducted extensive experiments to prove the high effectiveness and efficiency of the proposed approach.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Hung:2014:QTB, author = "Ho Hoang Hung and Sourav S. Bhowmick and Ba Quan Truong and Byron Choi and Shuigeng Zhou", title = "{QUBLE}: towards blending interactive visual subgraph search queries on large networks", journal = j-VLDB-J, volume = "23", number = "3", pages = "401--426", month = jun, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0322-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 29 06:13:52 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In a previous paper, we laid out the vision of a novel graph query processing paradigm where instead of processing a visual query graph after its construction, it interleaves visual query formulation and processing by exploiting the latency offered by the gui to filter irrelevant matches and prefetch partial query results [ 8]. Our recent attempts at implementing this vision [8, 9] show significant improvement in system response time (srt) for subgraph queries. However, these efforts are designed specifically for graph databases containing a large collection of small or medium-sized graphs. In this paper, we propose a novel algorithm called QUBLE (QUery Blender for Large nEtworks) to realize this visual subgraph querying paradigm on very large networks (e.g., protein interaction networks, social networks). First, it decomposes a large network into a set of graphlets and supergraphlets using a minimum cut-based graph partitioning technique. Next, it mines approximate frequent and small infrequent fragments (sifs) from them and identifies their occurrences in these graphlets and supergraphlets. Then, the indexing framework of [9] is enhanced so that the mined fragments can be exploited to index graphlets for efficient blending of visual subgraph query formulation and query processing. Extensive experiments on large networks demonstrate effectiveness of QUBLE.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Teodoro:2014:ASS, author = "George Teodoro and Eduardo Valle and Nathan Mariano and Ricardo Torres and Wagner {Meira, Jr.} and Joel H. Saltz", title = "Approximate similarity search for online multimedia services on distributed {CPU--GPU} platforms", journal = j-VLDB-J, volume = "23", number = "3", pages = "427--448", month = jun, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0329-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 29 06:13:52 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Similarity search in high-dimensional spaces is a pivotal operation for several database applications, including online content-based multimedia services. With the increasing popularity of multimedia applications, these services are facing new challenges regarding (1) the very large and growing volumes of data to be indexed/searched and (2) the necessity of reducing the response times as observed by end-users. In addition, the nature of the interactions between users and online services creates fluctuating query request rates throughout execution, which requires a similarity search engine to adapt to better use the computation platform and minimize response times. In this work, we address these challenges with Hypercurves, a flexible framework for answering approximate k-nearest neighbor (kNN) queries for very large multimedia databases. Hypercurves executes in hybrid CPU---GPU environments and is able to attain massive query-processing rates through the cooperative use of these devices. Hypercurves also changes its CPU---GPU task partitioning dynamically according to the observed load, aiming for optimal response times. In our empirical evaluation, dynamic task partitioning reduced query response times by approximately 50\% compared to the best static task partition. Due to a probabilistic proof of equivalence to the sequential kNN algorithm, the CPU---GPU execution of Hypercurves in distributed (multi-node) environments can be aggressively optimized, attaining superlinear scalability while still guaranteeing, with high probability, results at least as good as those from the sequential algorithm.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Shang:2014:PTM, author = "Shuo Shang and Ruogu Ding and Kai Zheng and Christian S. Jensen and Panos Kalnis and Xiaofang Zhou", title = "Personalized trajectory matching in spatial networks", journal = j-VLDB-J, volume = "23", number = "3", pages = "449--468", month = jun, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0331-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 29 06:13:52 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "With the increasing availability of moving-object tracking data, trajectory search and matching is increasingly important. We propose and investigate a novel problem called personalized trajectory matching (PTM). In contrast to conventional trajectory similarity search by spatial distance only, PTM takes into account the significance of each sample point in a query trajectory. A PTM query takes a trajectory with user-specified weights for each sample point in the trajectory as its argument. It returns the trajectory in an argument data set with the highest similarity to the query trajectory. We believe that this type of query may bring significant benefits to users in many popular applications such as route planning, carpooling, friend recommendation, traffic analysis, urban computing, and location-based services in general. PTM query processing faces two challenges: how to prune the search space during the query processing and how to schedule multiple so-called expansion centers effectively. To address these challenges, a novel two-phase search algorithm is proposed that carefully selects a set of expansion centers from the query trajectory and exploits upper and lower bounds to prune the search space in the spatial and temporal domains. An efficiency study reveals that the algorithm explores the minimum search space in both domains. Second, a heuristic search strategy based on priority ranking is developed to schedule the multiple expansion centers, which can further prune the search space and enhance the query efficiency. The performance of the PTM query is studied in extensive experiments based on real and synthetic trajectory data sets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Richter:2014:TZO, author = "Stefan Richter and Jorge-Arnulfo Quian{\'e}-Ruiz and Stefan Schuh and Jens Dittrich", title = "Towards zero-overhead static and adaptive indexing in {Hadoop}", journal = j-VLDB-J, volume = "23", number = "3", pages = "469--494", month = jun, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0332-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 29 06:13:52 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Hadoop MapReduce has evolved to an important industry standard for massive parallel data processing and has become widely adopted for a variety of use-cases. Recent works have shown that indexes can improve the performance of selective MapReduce jobs dramatically. However, one major weakness of existing approaches is high index creation costs. We present HAIL (Hadoop Aggressive Indexing Library), a novel indexing approach for HDFS and Hadoop MapReduce. HAIL creates different clustered indexes over terabytes of data with minimal, often invisible costs, and it dramatically improves runtimes of several classes of MapReduce jobs. HAIL features two different indexing pipelines, static indexing and adaptive indexing. HAIL static indexing efficiently indexes datasets while uploading them to HDFS. Thereby, HAIL leverages the default replication of Hadoop and enhances it with logical replication. This allows HAIL to create multiple clustered indexes for a dataset, e.g., one for each physical replica. Still, in terms of upload time, HAIL matches or even improves over the performance of standard HDFS. Additionally, HAIL adaptive indexing allows for automatic, incremental indexing at job runtime with minimal runtime overhead. For example, HAIL adaptive indexing can completely index a dataset as byproduct of only four MapReduce jobs while incurring an overhead as low as 11\% for the very first of those job only. In our experiments, we show that HAIL improves job runtimes by up to $ 68 \times $ over Hadoop. This article is an extended version of the VLDB 2012 paper (Dittrich et al. in PVLDB 5(11):1591---1602, 2012).", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Meier:2014:BR, author = "Michael Meier", title = "The backchase revisited", journal = j-VLDB-J, volume = "23", number = "3", pages = "495--516", month = jun, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0333-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 29 06:13:52 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Semantic query optimization is the process of finding equivalent rewritings of an input query given constraints that hold in a database instance. In this paper, we report about a Chase \& Backchase (C\&B) algorithm strategy that generalizes and improves on well-known methods in the field. The implementation of our approach, the Pegasussystem, outperforms existing C\&B systems an average by two orders of magnitude. This gain in performance is due to a combination of novel methods that lower the complexity in practical situations significantly.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Gedik:2014:PFS, author = "Bugra Gedik", title = "Partitioning functions for stateful data parallelism in stream processing", journal = j-VLDB-J, volume = "23", number = "4", pages = "517--539", month = aug, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0335-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Jul 16 17:57:07 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper, we study partitioning functions for stream processing systems that employ stateful data parallelism to improve application throughput. In particular, we develop partitioning functions that are effective under workloads where the domain of the partitioning key is large and its value distribution is skewed. We define various desirable properties for partitioning functions, ranging from balance properties such as memory, processing, and communication balance, structural properties such as compactness and fast lookup, and adaptation properties such as fast computation and minimal migration. We introduce a partitioning function structure that is compact and develop several associated heuristic construction techniques that exhibit good balance and low migration cost under skewed workloads. We provide experimental results that compare our partitioning functions to more traditional approaches such as uniform and consistent hashing, under different workload and application characteristics, and show superior performance.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Koh:2014:FKM, author = "Jia-Ling Koh and Chen-Yi Lin and Arbee L. Chen", title = "Finding $ k k $ most favorite products based on reverse top-$ t t $ queries", journal = j-VLDB-J, volume = "23", number = "4", pages = "541--564", month = aug, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0336-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Jul 16 17:57:07 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A reverse top-$t$ query for a product returns a set of customers, named potential customers, who regard the product as one of their top-$t$ favorites. Given a set of customers with different preferences on the features of the products, we want to select at most k k products from a pool of candidate products such that their total number of potential customers is maximized. Two versions of the problem are defined according to whether the competitive existing products are given. For solving this NP-hard problem, we first propose an incremental greedy approach to find an approximate solution of the problem with quality guaranteed. For further speeding up this basic greedy approach, we exploit several properties of the top-$ t t$ queries and skyline queries to reduce the solution space of the problem. In addition, an upper bound of the potential customers is estimated to reduce the cost of computing the reverse top-$ t t$ queries for the candidate products. Finally, when the candidate products are formed from multiple component tables, we propose a strategy to reduce the number of the accessed tuples in the component tables such that only the tuples that are possibly components of the top-$ t t$ favorites of the customers need to be accessed. By applying these pruning strategies, we propose another faster greedy approach. The experiment results demonstrate that the proposed pruning strategies work very well and make the faster greedy algorithms for both versions of the problem achieve excellent performance on both efficiency and memory utilization.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zou:2014:GGB, author = "Lei Zou and M. Tamer {\"O}zsu and Lei Chen and Xuchuan Shen and Ruizhe Huang and Dongyan Zhao", title = "{gStore}: a graph-based {SPARQL} query engine", journal = j-VLDB-J, volume = "23", number = "4", pages = "565--590", month = aug, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0337-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Jul 16 17:57:07 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We address efficient processing of SPARQL queries over RDF datasets. The proposed techniques, incorporated into the gStore system, handle, in a uniform and scalable manner, SPARQL queries with wildcards and aggregate operators over dynamic RDF datasets. Our approach is graph based. We store RDF data as a large graph and also represent a SPARQL query as a query graph. Thus, the query answering problem is converted into a subgraph matching problem. To achieve efficient and scalable query processing, we develop an index, together with effective pruning rules and efficient search algorithms. We propose techniques that use this infrastructure to answer aggregation queries. We also propose an effective maintenance algorithm to handle online updates over RDF repositories. Extensive experiments confirm the efficiency and effectiveness of our solutions.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Tao:2014:ILW, author = "Yufei Tao and Yi Yang and Xiaocheng Hu and Cheng Sheng and Shuigeng Zhou", title = "Instance-level worst-case query bounds on {R}-trees", journal = j-VLDB-J, volume = "23", number = "4", pages = "591--607", month = aug, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0339-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Jul 16 17:57:07 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Even with its significant impacts on the database area, the R-tree is often criticized by its lack of good worst-case guarantees. For example, in range search (where we want to report all the data points in a query rectangle), it is known that on adversely designed datasets and queries, an R-tree can be as slow as a sequential scan that simply reads all the data points. Nevertheless, R-trees work so well on real data that they have been widely implemented in commercial systems. This stark contrast has caused long-term controversy between practitioners and theoreticians as to whether this structure deserves its fame. This paper provides theoretical evidence that, somewhat surprisingly, R-trees are efficient in the worst case for range search on many real datasets. Given any integer K K, we explain how to obtain an upper bound on the cost of answering all (i.e., infinitely many) range queries retrieving at most K K objects. On practical data, the upper bound is only a fraction of the overhead of sequential scan (unless, apparently, K K is at the same order as the dataset size). Our upper bounds are tight up to a constant factor, namely they cannot be lowered by more than $ O(1) O(1) $ times while still capturing the most expensive queries. Our upper bounds can be calculated in constant time by remembering only three integers. These integers, in turn, are generated from only the leaf MBRs of an R-tree, but not the leaf nodes themselves. In practice, the internal nodes are often buffered in memory, so that the integers aforementioned can be efficiently maintained along with the data updates and made available to a query optimizer at any time. Furthermore, our analytical framework introduces instance-level query bound as a new technique for evaluating the efficiency of heuristic structures in a theory-flavored manner (previously, experimentation was the dominant assessment method).", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Cicek:2014:ELD, author = "A. Ercument Cicek and Mehmet Ercan Nergiz and Yucel Saygin", title = "Ensuring location diversity in privacy-preserving spatio-temporal data publishing", journal = j-VLDB-J, volume = "23", number = "4", pages = "609--625", month = aug, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0342-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Jul 16 17:57:07 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The rise of mobile technologies in the last decade has led to vast amounts of location information generated by individuals. From the knowledge discovery point of view, these data are quite valuable, but the inherent personal information in the data raises privacy concerns. There exists many algorithms in the literature to satisfy the privacy requirements of individuals, by generalizing, perturbing, and suppressing their data. Current techniques that try to ensure a level of indistinguishability between trajectories in a dataset are direct applications of k k -anonymity, thus suffer from the shortcomings of k k -anonymity such as the lack of diversity in sensitive regions. Moreover, these techniques fail to incorporate some common background knowledge, an adversary might have such as the underlying map, the traffic density, and the anonymization algorithm itself. We propose a new privacy metric p p -confidentiality that ensures location diversity by bounding the probability of a user visiting a sensitive location with the p p input parameter. We perform our probabilistic analysis based on the background knowledge of the adversary. Instead of grouping the trajectories, we anonymize the underlying map, that is, we group nodes (points of interest) to create obfuscation areas around sensitive locations. The groups are formed in such a way that the parts of trajectories entering the groups, coupled with the adversary background, do not increase the adversary's belief in violating the p p -confidentiality. We then use the map anonymization as a model to anonymize the trajectories. We prove that our algorithm is resistant to reverse-engineering attacks when the statistics required for map anonymization is publicly available. We empirically evaluate the performance of our algorithm and show that location diversity can be satisfied effectively.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Unterbrunner:2014:HAE, author = "Philipp Unterbrunner and Gustavo Alonso and Donald Kossmann", title = "High availability, elasticity, and strong consistency for massively parallel scans over relational data", journal = j-VLDB-J, volume = "23", number = "4", pages = "627--652", month = aug, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0343-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Jul 16 17:57:07 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "An elastic and highly available data store is a key component of many cloud applications. Existing data stores with strong consistency guarantees are designed and optimized for small updates, key-value access, and (if supported) small range queries over a predefined key column. This raises performance and availability problems for applications which inherently require large updates, non-key access, and large range queries. This paper presents a solution to these problems: Crescando/RB; a distributed, scan-based, main memory, relational data store (single table) with robust performance and high availability. The system addresses a real, large-scale industry use case: the Amadeus travel management system. This paper focuses on the distribution layer of Crescando/RB, the problem and theory behind it, the rationale underlying key design decisions, and the novel multicast protocol and replication framework it is composed of. Highlighting the key features of the distribution layer, we present experimental results showing that even under permanent node failures and large-scale data repartitioning, Crescando/RB remains fully available and capable of sustaining a heavy query and update load.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chen:2014:CND, author = "Rui Chen and Benjamin C. Fung and Philip S. Yu and Bipin C. Desai", title = "Correlated network data publication via differential privacy", journal = j-VLDB-J, volume = "23", number = "4", pages = "653--676", month = aug, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0344-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Jul 16 17:57:07 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "With the increasing prevalence of information networks, research on privacy-preserving network data publishing has received substantial attention recently. There are two streams of relevant research, targeting different privacy requirements. A large body of existing works focus on preventing node re-identification against adversaries with structural background knowledge, while some other studies aim to thwart edge disclosure. In general, the line of research on preventing edge disclosure is less fruitful, largely due to lack of a formal privacy model. The recent emergence of differential privacy has shown great promise for rigorous prevention of edge disclosure. Yet recent research indicates that differential privacy is vulnerable to data correlation, which hinders its application to network data that may be inherently correlated. In this paper, we show that differential privacy could be tuned to provide provable privacy guarantees even in the correlated setting by introducing an extra parameter, which measures the extent of correlation. We subsequently provide a holistic solution for non-interactive network data publication. First, we generate a private vertex labeling for a given network dataset to make the corresponding adjacency matrix form dense clusters. Next, we adaptively identify dense regions of the adjacency matrix by a data-dependent partitioning process. Finally, we reconstruct a noisy adjacency matrix by a novel use of the exponential mechanism. To our best knowledge, this is the first work providing a practical solution for publishing real-life network data via differential privacy. Extensive experiments demonstrate that our approach performs well on different types of real-life network datasets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Xiang:2014:AED, author = "Yang Xiang", title = "Answering exact distance queries on real-world graphs with bounded performance guarantees", journal = j-VLDB-J, volume = "23", number = "5", pages = "677--695", month = oct, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0338-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Sep 24 08:05:09 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The ability to efficiently obtain exact distance information from both directed and undirected graphs is desired by many real-world applications. In this work, we unified the query indexing efforts on directed and undirected graphs into one by proposing the TreeMap approach. Our approach has very tight bounds on query time, index size, and construction time for answering queries on both directed and undirected graphs. The query time complexity is close to constant for graphs with a small width of tree decomposition, and the index construction can be completed without materializing the distance matrix or other high-cost operations. In the empirical study, we demonstrated that the TreeMap approach in general performs much better than competitive methods in indexing real graphs for answering exact distance queries.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yao:2014:DMO, author = "Bin Yao and Xiaokui Xiao and Feifei Li and Yifan Wu", title = "Dynamic monitoring of optimal locations in road network databases", journal = j-VLDB-J, volume = "23", number = "5", pages = "697--720", month = oct, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0347-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Sep 24 08:05:09 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Optimal location (OL) queries are a type of spatial queries that are particularly useful for the strategic planning of resources. Given a set of existing facilities and a set of clients, an OL query asks for a location to build a new facility that optimizes a certain cost metric (defined based on the distances between the clients and the facilities). Several techniques have been proposed to address OL queries, assuming that all clients and facilities reside in an $ L_p $ space. In practice, however, movements between spatial locations are usually confined by the underlying road network, and hence, the actual distance between two locations can differ significantly from their $ L_p $ distance. Motivated by the deficiency of the existing techniques, this paper presents a comprehensive study on OL queries in road networks. We propose a unified framework that addresses three variants of OL queries that find important applications in practice, and we instantiate the framework with several novel query processing algorithms. We further extend our framework to efficiently monitor the OLs when locations for facilities and/or clients have been updated. Our dynamic update methods lead to efficient answering of continuous optimal location queries. We demonstrate the efficiency of our solutions through extensive experiments with large real data.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Tran:2014:QRE, author = "Quoc Trung Tran and Chee-Yong Chan and Srinivasan Parthasarathy", title = "Query reverse engineering", journal = j-VLDB-J, volume = "23", number = "5", pages = "721--746", month = oct, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0349-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Sep 24 08:05:09 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper, we introduce a new problem termed query reverse engineering (QRE). Given a database $D$ and a result table $T$ --- the output of some known or unknown query $Q$ on $D$ --- the goal of QRE is to reverse-engineer a query $ Q'$ such that the output of query $ Q'$ on database $D$ (denoted by $ Q'(D)$) is equal to $T$ (i.e., $ Q(D)$). The QRE problem has useful applications in database usability, data analysis, and data security. In this work, we propose a data-driven approach, TALOS for {\bf T}ree-based classifier with {\bf A}t {\bf L}east {\bf O}ne {\bf S}emantics, that is based on a novel dynamic data classification formulation and extend the approach to efficiently support the three key dimensions of the QRE problem: whether the input query is known\slash unknown, supporting different query fragments, and supporting multiple database versions.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Martinenghi:2014:TBR, author = "Davide Martinenghi and Riccardo Torlone", title = "Taxonomy-based relaxation of query answering in relational databases", journal = j-VLDB-J, volume = "23", number = "5", pages = "747--769", month = oct, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-013-0350-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Sep 24 08:05:09 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Traditional information search in which queries are posed against a known and rigid schema over a structured database is shifting toward a Web scenario in which exposed schemas are vague or absent and data come from heterogeneous sources. In this framework, query answering cannot be precise and needs to be relaxed, with the goal of matching user requests with accessible data. In this paper, we propose a logical model and a class of abstract query languages as a foundation for querying relational data sets with vague schemas. Our approach relies on the availability of taxonomies, that is, simple classifications of terms arranged in a hierarchical structure. The model is a natural extension of the relational model in which data domains are organized in hierarchies, according to different levels of generalization between terms. We first propose a conservative extension of the relational algebra for this model in which special operators allow the specification of relaxed queries over vaguely structured information. We also study equivalence and rewriting properties of the algebra that can be used for query optimization. We then illustrate a logic-based query language that can provide a basis for expressing relaxed queries in a declarative way. We finally investigate the expressive power of the proposed query languages and the independence of the taxonomy in this context.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Soria-Comas:2014:EDU, author = "Jordi Soria-Comas and Josep Domingo-Ferrer and David S{\'a}nchez and Sergio Mart{\'\i}nez", title = "Enhancing data utility in differential privacy via microaggregation-based $k$-anonymity", journal = j-VLDB-J, volume = "23", number = "5", pages = "771--794", month = oct, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0351-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Sep 24 08:05:09 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "It is not uncommon in the data anonymization literature to oppose the ``old'' $k$-anonymity model to the ``new'' differential privacy model, which offers more robust privacy guarantees. Yet, it is often disregarded that the utility of the anonymized results provided by differential privacy is quite limited, due to the amount of noise that needs to be added to the output, or because utility can only be guaranteed for a restricted type of queries. This is in contrast with $k$-anonymity mechanisms, which make no assumptions on the uses of anonymized data while focusing on preserving data utility from a general perspective. In this paper, we show that a synergy between differential privacy and $k$-anonymity can be found: $k$-anonymity can help improving the utility of differentially private responses to arbitrary queries. We devote special attention to the utility improvement of differentially private published data sets. Specifically, we show that the amount of noise required to fulfill $ \varepsilon $-differential privacy can be reduced if noise is added to a $k$-anonymous version of the data set, where $k$-anonymity is reached through a specially designed microaggregation of all attributes. As a result of noise reduction, the general analytical utility of the anonymized output is increased. The theoretical benefits of our proposal are illustrated in a practical setting with an empirical evaluation on three data sets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Magnani:2014:TBP, author = "Matteo Magnani and Ira Assent and Michael L. Mortensen", title = "Taking the {Big Picture}: representative skylines based on significance and diversity", journal = j-VLDB-J, volume = "23", number = "5", pages = "795--815", month = oct, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0352-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Sep 24 08:05:09 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The skyline is a popular operator to extract records from a database when a record scoring function is not available. However, the result of a skyline query can be very large. The problem addressed in this paper is the automatic selection of a small number ($k$) of representative skyline records. Existing approaches have only focused on partial aspects of this problem. Some try to identify sets of diverse records giving an overall approximation of the skyline. These techniques, however, are sensitive to the scaling of attributes or to the insertion of non-skyline records into the database. Others exploit some knowledge of the record scoring function to identify the most significant record, but not sets of records representative of the whole skyline. In this paper, we introduce a novel approach taking both the significance of all the records and their diversity into account, adapting to available knowledge of the scoring function, but also working under complete ignorance. We show the intractability of the problem and present approximate algorithms. We experimentally show that our approach is efficient, scalable and that it improves existing works in terms of the significance and diversity of the results.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Sidlauskas:2014:PEM, author = "Darius {\v{S}}idlauskas and Simonas {\v{S}}altenis and Christian S. Jensen", title = "Processing of extreme moving-object update and query workloads in main memory", journal = j-VLDB-J, volume = "23", number = "5", pages = "817--841", month = oct, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0353-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Sep 24 08:05:09 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The efficient processing of workloads that interleave moving-object updates and queries is challenging. In addition to the conflicting needs for update-efficient versus query-efficient data structures, the increasing parallel capabilities of multi-core processors yield challenges. To prevent concurrency anomalies and to ensure correct system behavior, conflicting update and query operations must be serialized. In this setting, it is a key concern to avoid that operations are blocked, which leaves processing cores idle. To enable efficient processing, we first examine concurrency degrees from traditional transaction processing in the context of our target domain and propose new semantics that enable a high degree of parallelism and ensure up-to-date query results. We define the new semantics for range and $k$-nearest neighbor queries. Then, we present a main-memory indexing technique called parallel grid that implements the proposed semantics as well as two other variants supporting different semantics. This enables us to quantify the effects that different degrees of consistency have on performance. We also present an alternative time-partitioning approach. Empirical studies with the above and three existing proposals conducted on modern processors show that our proposals scale near-linearly with the number of hardware threads and thus are able to benefit from increasing on-chip parallelism.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Aboulnaga:2014:SSD, author = "Ashraf Aboulnaga and Beng Chin Ooi and Patrick Valduriez", title = "Special section on data-intensive cloud infrastructure", journal = j-VLDB-J, volume = "23", number = "6", pages = "843--843", month = dec, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0371-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Nov 24 15:31:08 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Kumar:2014:SWA, author = "K. Ashwin Kumar and Abdul Quamar and Amol Deshpande and Samir Khuller", title = "{SWORD}: workload-aware data placement and replica selection for cloud data management systems", journal = j-VLDB-J, volume = "23", number = "6", pages = "845--870", month = dec, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0362-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Nov 24 15:31:08 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Cloud computing is increasingly being seen as a way to reduce infrastructure costs and add elasticity, and is being used by a wide range of organizations. Cloud data management systems today need to serve a range of different workloads, from analytical read-heavy workloads to transactional (OLTP) workloads. For both the service providers and the users, it is critical to minimize the consumption of resources like CPU, memory, communication bandwidth, and energy, without compromising on service-level agreements if any. In this article, we develop a workload-aware data placement and replication approach, called SWORD, for minimizing resource consumption in such an environment. Specifically, we monitor and model the expected workload as a hypergraph and develop partitioning techniques that minimize the average query span, i.e., the average number of machines involved in the execution of a query or a transaction. We empirically justify the use of query span as the metric to optimize, for both analytical and transactional workloads, and develop a series of replication and data placement algorithms by drawing connections to several well-studied graph theoretic concepts. We introduce a suite of novel techniques to achieve high scalability by reducing the overhead of partitioning and query routing. To deal with workload changes, we propose an incremental repartitioning technique that modifies data placement in small steps without resorting to complete repartitioning. We propose the use of fine-grained quorums defined at the level of groups of data items to control the cost of distributed updates, improve throughput, and adapt to different workloads. We empirically illustrate the benefits of our approach through a comprehensive experimental evaluation for two classes of workloads. For analytical read-only workloads, we show that our techniques result in significant reduction in total resource consumption. For OLTP workloads, we show that our approach improves transaction latencies and overall throughput by minimizing the number of distributed transactions.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Sahli:2014:ASP, author = "Majed Sahli and Essam Mansour and Panos Kalnis", title = "{ACME}: a scalable parallel system for extracting frequent patterns from a very long sequence", journal = j-VLDB-J, volume = "23", number = "6", pages = "871--893", month = dec, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0370-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Nov 24 15:31:08 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Modern applications, including bioinformatics, time series, and web log analysis, require the extraction of frequent patterns, called motifs, from one very long (i.e., several gigabytes) sequence. Existing approaches are either heuristics that are error-prone, or exact (also called combinatorial) methods that are extremely slow, therefore, applicable only to very small sequences (i.e., in the order of megabytes). This paper presents ACME, a combinatorial approach that scales to gigabyte-long sequences and is the first to support supermaximal motifs. ACME is a versatile parallel system that can be deployed on desktop multi-core systems, or on thousands of CPUs in the cloud. However, merely using more compute nodes does not guarantee efficiency, because of the related overheads. To this end, ACME introduces an automatic tuning mechanism that suggests the appropriate number of CPUs to utilize, in order to meet the user constraints in terms of run time, while minimizing the financial cost of cloud resources. Our experiments show that, compared to the state of the art, ACME supports three orders of magnitude longer sequences (e.g., DNA for the entire human genome); handles large alphabets (e.g., English alphabet for Wikipedia); scales out to 16,384 CPUs on a supercomputer; and supports elastic deployment in the cloud.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lo:2014:MGD, author = "Eric Lo and Nick Cheng and Wilfred W. Lin and Wing-Kai Hon and Byron Choi", title = "{MyBenchmark}: generating databases for query workloads", journal = j-VLDB-J, volume = "23", number = "6", pages = "895--913", month = dec, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0354-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Nov 24 15:31:08 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "To evaluate the performance of database applications and database management systems (DBMSs), we usually execute workloads of queries on generated databases of different sizes and then benchmark various measures such as respond time and throughput. This paper introduces MyBenchmark, a parallel data generation tool that takes a set of queries as input and generates database instances. Users of MyBenchmark can control the characteristics of the generated data as well as the characteristics of the resulting workload. Applications of MyBenchmark include DBMS testing, database application testing, and application-driven benchmarking. In this paper, we present the architecture and the implementation algorithms of MyBenchmark. Experimental results show that MyBenchmark is able to generate workload-aware databases for a variety of workloads including query workloads extracted from TPC-C, TPC-E, TPC-H, and TPC-W benchmarks.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Xie:2014:MEB, author = "Qing Xie and Chaoyi Pang and Xiaofang Zhou and Xiangliang Zhang and Ke Deng", title = "Maximum error-bounded {Piecewise Linear Representation} for online stream approximation", journal = j-VLDB-J, volume = "23", number = "6", pages = "915--937", month = dec, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0355-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Nov 24 15:31:08 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Given a time series data stream, the generation of error-bounded Piecewise Linear Representation (error-bounded PLR) is to construct a number of consecutive line segments to approximate the stream, such that the approximation error does not exceed a prescribed error bound. In this work, we consider the error bound in $L_\infty L`$? norm as approximation criterion, which constrains the approximation error on each corresponding data point, and aim on designing algorithms to generate the minimal number of segments. In the literature, the optimal approximation algorithms are effectively designed based on transformed space other than time-value space, while desirable optimal solutions based on original time domain (i.e., time-value space) are still lacked. In this article, we proposed two linear-time algorithms to construct error-bounded PLR for data stream based on time domain, which are named OptimalPLR and GreedyPLR, respectively. The OptimalPLR is an optimal algorithm that generates minimal number of line segments for the stream approximation, and the GreedyPLR is an alternative solution for the requirements of high efficiency and resource-constrained environment. In order to evaluate the superiority of OptimalPLR, we theoretically analyzed and compared OptimalPLR with the state-of-art optimal solution in transformed space, which also achieves linear complexity. We successfully proved the theoretical equivalence between time-value space and such transformed space, and also discovered the superiority of OptimalPLR on processing efficiency in practice. The extensive results of empirical evaluation support and demonstrate the effectiveness and efficiency of our proposed algorithms.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Alexandrov:2014:SPB, author = "Alexander Alexandrov and Rico Bergmann and Stephan Ewen and Johann-Christoph Freytag and Fabian Hueske and Arvid Heise and Odej Kao and Marcus Leich and Ulf Leser and Volker Markl and Felix Naumann and Mathias Peters and Astrid Rheinl{\"a}nder and Matthias J. Sax and Sebastian Schelter and Mareike H{\"o}ger and Kostas Tzoumas and Daniel Warneke", title = "The {Stratosphere} platform for big data analytics", journal = j-VLDB-J, volume = "23", number = "6", pages = "939--964", month = dec, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0357-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Nov 24 15:31:08 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We present Stratosphere, an open-source software stack for parallel data analysis. Stratosphere brings together a unique set of features that allow the expressive, easy, and efficient programming of analytical applications at very large scale. Stratosphere's features include ``in situ'' data processing, a declarative query language, treatment of user-defined functions as first-class citizens, automatic program parallelization and optimization, support for iterative programs, and a scalable and efficient execution engine. Stratosphere covers a variety of ``Big Data'' use cases, such as data warehousing, information extraction and integration, data cleansing, graph analysis, and statistical analysis applications. In this paper, we present the overall system architecture design decisions, introduce Stratosphere through example queries, and then dive into the internal workings of the system's components that relate to extensibility, programming model, optimization, and query execution. We experimentally compare Stratosphere against popular open-source alternatives, and we conclude with a research outlook for the next years.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ward:2014:RTC, author = "Phillip G. Ward and Zhen He and Rui Zhang and Jianzhong Qi", title = "Real-time continuous intersection joins over large sets of moving objects using graphic processing units", journal = j-VLDB-J, volume = "23", number = "6", pages = "965--985", month = dec, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0358-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Nov 24 15:31:08 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The Multiple Time Bucket Join (MTB-join) algorithm is the state of the art for processing the continuous intersection join (CI-join) query over moving objects. It considerably outperforms alternatives, but still falls short of real-time application performance requirements for large sets of moving objects. In this paper, we achieve real-time performance for the CI-join query over large sets of moving objects by exploiting the computational power of commodity graphics processing units (GPUs). We first analyze how the main characteristics of the MTB-join algorithm make it ill suited to GPUs and identify key challenges in designing efficient GPU-based algorithms for the query. We then address these challenges by developing the multi-layered grid join (MLG-join) algorithm which has the following key features: (i) memory locality friendly indexing, (ii) no dynamic memory allocation, (iii) in-place object updates, (iv) lock-free concurrent updates, and (v) massive parallelism. These features unleash the full potential of the memory bandwidth and parallel processing of GPUs. Furthermore, we conduct a theoretical analysis which can predict the pruning power of the MLG-join algorithm given certain parameter values used in the algorithm. This allows us to select optimal parameter values. Through extensive experimental results, we show that our analysis accurately models the MLG-join algorithm's sensitivity to parameter values. The proposed MLG-join algorithm outperforms the MTB-join algorithm, and a GPU-based nested-loops join algorithm, by up to two orders of magnitude, and achieves real-time performance for CI-join queries on large sets of moving objects.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Binnig:2014:DSI, author = "Carsten Binnig and Stefan Hildenbrand and Franz F{\"a}rber and Donald Kossmann and Juchang Lee and Norman May", title = "Distributed snapshot isolation: global transactions pay globally, local transactions pay locally", journal = j-VLDB-J, volume = "23", number = "6", pages = "987--1011", month = dec, year = "2014", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0359-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Nov 24 15:31:08 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Modern database systems employ Snapshot Isolation to implement concurrency control and isolation because it promises superior query performance compared to lock-based alternatives. Furthermore, Snapshot Isolation never blocks readers, which is an important property for modern information systems, which have mixed workloads of heavy OLAP queries and short update transactions. This paper revisits the problem of implementing Snapshot Isolation in a distributed database system and makes three important contributions. First, a complete definition of Distributed Snapshot Isolation is given, thereby extending existing definitions from the literature. Based on this definition, a set of criteria is proposed to efficiently implement Snapshot Isolation in a distributed system. Second, the design space of alternative methods to implement Distributed Snapshot Isolation is presented based on this set of criteria. Third, a new approach to implement Distributed Snapshot Isolation is devised; we refer to this approach as Incremental. The results of comprehensive performance experiments with the TPC-C benchmark show that the Incremental approach significantly outperforms any other known method from the literature. Furthermore, the Incremental approach requires no a priori knowledge of which nodes of a distributed system are involved in executing a transaction. Also, the Incremental approach can execute transactions that involve data from a single node only with the same efficiency as a centralized database system. This way, the Incremental approach takes advantage of sharding or other ways to improve data locality. The cost for synchronizing transactions in a distributed system is only paid by transactions that actually involve data from several nodes. All these properties make the Incremental approach more practical than related methods proposed in the literature.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Vlachos:2015:CMF, author = "Michail Vlachos and Nikolaos M. Freris and Anastasios Kyrillidis", title = "Compressive mining: fast and optimal data mining in the compressed domain", journal = j-VLDB-J, volume = "24", number = "1", pages = "1--24", month = feb, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0360-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Mar 6 15:25:03 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Real-world data typically contain repeated and periodic patterns. This suggests that they can be effectively represented and compressed using only a few coefficients of an appropriate basis (e.g., Fourier and wavelets). However, distance estimation when the data are represented using different sets of coefficients is still a largely unexplored area. This work studies the optimization problems related to obtaining the tightest lower/upper bound on Euclidean distances when each data object is potentially compressed using a different set of orthonormal coefficients. Our technique leads to tighter distance estimates, which translates into more accurate search, learning and mining operations directly in the compressed domain. We formulate the problem of estimating lower/upper distance bounds as an optimization problem. We establish the properties of optimal solutions and leverage the theoretical analysis to develop a fast algorithm to obtain an exact solution to the problem. The suggested solution provides the tightest estimation of the $ L_2$-norm or the correlation. We show that typical data analysis operations, such as $k$-nearest-neighbor search or $k$-Means clustering, can operate more accurately using the proposed compression and distance reconstruction technique. We compare it with many other prevalent compression and reconstruction techniques, including random projections and PCA-based techniques. We highlight a surprising result, namely that when the data are highly sparse in some basis, our technique may even outperform PCA-based compression. The contributions of this work are generic as our methodology is applicable to any sequential or high-dimensional data as well as to any orthogonal data transformation used for the underlying data compression scheme.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Sistla:2015:CNN, author = "A. Prasad Sistla and Ouri Wolfson and Bo Xu", title = "Continuous nearest-neighbor queries with location uncertainty", journal = j-VLDB-J, volume = "24", number = "1", pages = "25--50", month = feb, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0361-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Mar 6 15:25:03 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper, we consider the problem of evaluating the continuous query of finding the k k nearest objects with respect to a given point object $O_q$ among a set of $n$ moving point-objects. The query returns a sequence of answer-pairs, namely pairs of the form $ (I, S) $ such that $I$ is a time interval and $S$ is the set of objects that are closest to $ O_q$ during $I$. When there is uncertainty associated with the locations of the moving objects, $S$ is the set of all the objects that are possibly the $k$ nearest neighbors. We analyze the lower bound and the upper bound on the maximum number of answer-pairs, for the certain case and the uncertain case, respectively. Then, we consider two different types of algorithms. The first is off-line algorithms that compute a priori all the answer-pairs. The second type is on-line algorithms that at any time return the current answer-pair. We present algorithms for the certain case and the uncertain case, respectively, and analyze their complexity. We experimentally compare different algorithms using a database of 1 million objects derived from real-world GPS traces.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Gur:2015:SFA, author = "Izzeddin G{\"u}r and Mehmet G{\"u}vercin and Hakan Ferhatosmanoglu", title = "Scaling forecasting algorithms using clustered modeling", journal = j-VLDB-J, volume = "24", number = "1", pages = "51--65", month = feb, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0363-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Mar 6 15:25:03 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Research on forecasting has traditionally focused on building more accurate statistical models for a given time series. The models are mostly applied to limited data due to efficiency and scalability problems. However, many enterprise applications require scalable forecasting on large number of data series. For example, telecommunication companies need to forecast each of their customers' traffic load to understand their usage behavior and to tailor targeted campaigns. Forecasting models are typically applied on aggregate data to estimate the total traffic volume for revenue estimation and resource planning. However, they cannot be easily applied to each user individually as building accurate models for large number of users would be time consuming. The problem is exacerbated when the forecasting process is continuous and the models need to be updated periodically. This paper addresses the problem of building and updating forecasting models continuously for multiple data series. We propose dynamic clustered modeling for forecasting by utilizing representative models as an analogy to cluster centers. We apply the models to each individual series through iterative nonlinear optimization. We develop two approaches: The Integrated Clustered Modeling integrates clustering and modeling simultaneously, and the Sequential Clustered Modeling applies them sequentially. Our findings indicate that modeling an individual's behavior using its segment can be more scalable and accurate than the individual model itself. The grouped models avoid overfits and capture common motifs even on noisy data. Experimental results from a telco CRM application show the method is efficient and scalable, and also more accurate than having separate individual models.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Kaoudi:2015:RCS, author = "Zoi Kaoudi and Ioana Manolescu", title = "{RDF} in the clouds: a survey", journal = j-VLDB-J, volume = "24", number = "1", pages = "67--91", month = feb, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0364-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Mar 6 15:25:03 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The Resource Description Framework (RDF) pioneered by the W3C is increasingly being adopted to model data in a variety of scenarios, in particular data to be published or exchanged on the Web. Managing large volumes of RDF data is challenging, due to the sheer size, the heterogeneity, and the further complexity brought by RDF reasoning. To tackle the size challenge, distributed storage architectures are required. Cloud computing is an emerging paradigm massively adopted in many applications for the scalability, fault-tolerance, and elasticity feature it provides, enabling the easy deployment of distributed and parallel architectures. In this article, we survey RDF data management architectures and systems designed for a cloud environment, and more generally, those large-scale RDF data management systems that can be easily deployed therein. We first give the necessary background, then describe the existing systems and proposals in this area, and classify them according to dimensions related to their capabilities and implementation techniques. The survey ends with a discussion of open problems and perspectives.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Su:2015:CTD, author = "Han Su and Kai Zheng and Jiamin Huang and Haozhou Wang and Xiaofang Zhou", title = "Calibrating trajectory data for spatio-temporal similarity analysis", journal = j-VLDB-J, volume = "24", number = "1", pages = "93--116", month = feb, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0365-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Mar 6 15:25:03 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Due to the prevalence of GPS-enabled devices and wireless communications technologies, spatial trajectories that describe the movement history of moving objects are being generated and accumulated at an unprecedented pace. Trajectory data in a database are intrinsically heterogeneous, as they represent discrete approximations of original continuous paths derived using different sampling strategies and different sampling rates. Such heterogeneity can have a negative impact on the effectiveness of trajectory similarity measures, which are the basis of many crucial trajectory processing tasks. In this paper, we pioneer a systematic approach to trajectory calibration that is a process to transform a heterogeneous trajectory dataset to one with (almost) unified sampling strategies. Specifically, we propose an anchor-based calibration system that aligns trajectories to a set of anchor points, which are fixed locations independent of trajectory data. After examining four different types of anchor points for the purpose of building a stable reference system, we propose a spatial-only geometry-based calibration approach that considers the spatial relationship between anchor points and trajectories. Then a more advanced spatial-only model-based calibration method is presented, which exploits the power of machine learning techniques to train inference models from historical trajectory data to improve calibration effectiveness. Afterward, since trajectory has temporal information, we extend these two spatial-only trajectory calibration algorithms to incorporate the temporal information, which can infer a proper time stamp to each anchor point of a calibrated trajectory. At last, we provide a solution to reduce cost, i.e., the number of trajectories that is necessary to be re-calibrated, of the updating of the reference system. Finally, we conduct extensive experiments using real trajectory datasets to demonstrate the effectiveness and efficiency of the proposed calibration system.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2015:CAI, author = "Hui Li and Sourav S. Bhowmick and Aixin Sun and Jiangtao Cui", title = "Conformity-aware influence maximization in online social networks", journal = j-VLDB-J, volume = "24", number = "1", pages = "117--141", month = feb, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0366-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Mar 6 15:25:03 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Influence maximization (im) is the problem of finding a small subset of nodes (seed nodes) in a social network that could maximize the spread of influence. Despite the progress achieved by state-of-the-art greedy im techniques, they suffer from two key limitations. Firstly, they are inefficient as they can take days to find seeds in very large real-world networks. Secondly, although extensive research in social psychology suggests that humans will readily conform to the wishes or beliefs of others, surprisingly, existing im techniques are conformity-unaware. That is, they only utilize an individual's ability to influence another but ignores conformity (a person's inclination to be influenced) of the individuals. In this paper, we propose a novel conformity-aware cascade ($ C^2$) model which leverages on the interplay between influence and conformity in obtaining the influence probabilities of nodes from underlying data for estimating influence spreads. We also propose a variant of this model called $ C^3$ model that supports context-specific influence and conformity of nodes. A salient feature of these models is that they are aligned to the popular social forces principle in social psychology. Based on these models, we propose a novel greedy algorithm called cinema that generates high-quality seed set for the im problem. It first partitions, the network into a set of non-overlapping subnetworks and for each of these subnetworks it computes the influence and conformity indices of nodes by analyzing the sentiments expressed by individuals. Each subnetwork is then associated with a cog-sublist which stores the marginal gains of the nodes in the subnetwork in descending order. The node with maximum marginal gain in each cog-sublist is stored in a data structure called mag-list. These structures are manipulated by cinema to efficiently find the seed set. A key feature of such partitioning-based strategy is that each node's influence computation and updates can be limited to the subnetwork it resides instead of the entire network. This paves way for seamless adoption of cinema on a distributed platform. Our empirical study with real-world social networks comprising of millions of nodes demonstrates that cinema as well as its context-aware and distributed variants generate superior quality seed set compared to state-of-the-art im approaches.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Deng:2015:UFA, author = "Dong Deng and Guoliang Li and Jianhua Feng and Yi Duan and Zhiguo Gong", title = "A unified framework for approximate dictionary-based entity extraction", journal = j-VLDB-J, volume = "24", number = "1", pages = "143--167", month = feb, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0367-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Mar 6 15:25:03 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Dictionary-based entity extraction identifies predefined entities (e.g., person names or locations) from documents. A recent trend for improving extraction recall is to support approximate entity extraction, which finds all substrings from documents that approximately match entities in a given dictionary. Existing methods to address this problem support either token-based similarity (e.g., Jaccard Similarity) or character-based dissimilarity (e.g., Edit Distance). It calls for a unified method to support various similarity/dissimilarity functions, since a unified method can reduce the programming efforts, the hardware requirements, and the manpower. In this paper, we propose a unified framework to support various similarity/dissimilarity functions, such as jaccard similarity, cosine similarity, dice similarity, edit similarity, and edit distance. Since many real-world applications have high-performance requirement for approximate entity extraction on data streams (e.g., Twitter), we focus on devising efficient algorithms to achieve high performance. We find that many substrings in documents have overlaps, and we can utilize the shared computation across the overlaps to avoid unnecessary redundant computation. To this end, we propose efficient filtering algorithms and develop effective pruning techniques. Experimental results show our method achieves high performance and outperforms state-of-the-art studies significantly.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Hung:2015:CAC, author = "Chih-Chieh Hung and Wen-Chih Peng and Wang-Chien Lee", title = "Clustering and aggregating clues of trajectories for mining trajectory patterns and routes", journal = j-VLDB-J, volume = "24", number = "2", pages = "169--192", month = apr, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-011-0262-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Mar 18 19:14:35 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper, we propose a new trajectory pattern mining framework, namely Clustering and Aggregating Clues of Trajectories (CACT), for discovering trajectory routes that represent the frequent movement behaviors of a user. In addition to spatial and temporal biases, we observe that trajectories contain silent durations, i.e., the time durations when no data points are available to describe the movements of users, which bring many challenging issues to trajectory pattern mining. We claim that a movement behavior would leave some clues in its various sampled/observed trajectories. These clues may be extracted from spatially and temporally co-located data points from the observed trajectories. Based on this observation, we propose clue-aware trajectory similarity to measure the clues between two trajectories. Accordingly, we further propose the clue-aware trajectory clustering algorithm to cluster similar trajectories into groups to capture the movement behaviors of the user. Finally, we devise the clue-aware trajectory aggregation algorithm to aggregate trajectories in the same group to derive the corresponding trajectory pattern and route. We validate our ideas and evaluate the proposed CACT framework by experiments using both synthetic and real datasets. The experimental results show that CACT is more effective in discovering trajectory patterns than the state-of-the-art techniques for mining trajectory patterns.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Eichinger:2015:TSC, author = "Frank Eichinger and Pavel Efros and Stamatis Karnouskos and Klemens B{\"o}hm", title = "A time-series compression technique and its application to the smart grid", journal = j-VLDB-J, volume = "24", number = "2", pages = "193--218", month = apr, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0368-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Mar 18 19:14:35 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Time-series data is increasingly collected in many domains. One example is the smart electricity infrastructure, which generates huge volumes of such data from sources such as smart electricity meters. Although today these data are used for visualization and billing in mostly 15-min resolution, its original temporal resolution frequently is more fine-grained, e.g., seconds. This is useful for various analytical applications such as short-term forecasting, disaggregation and visualization. However, transmitting and storing huge amounts of such fine-grained data are prohibitively expensive in terms of storage space in many cases. In this article, we present a compression technique based on piecewise regression and two methods which describe the performance of the compression. Although our technique is a general approach for time-series compression, smart grids serve as our running example and as our evaluation scenario. Depending on the data and the use-case scenario, the technique compresses data by ratios of up to factor 5,000 while maintaining its usefulness for analytics. The proposed technique has outperformed related work and has been applied to three real-world energy datasets in different scenarios. Finally, we show that the proposed compression technique can be implemented in a state-of-the-art database management system.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Xue:2015:SDS, author = "Andy Yuan Xue and Jianzhong Qi and Xing Xie and Rui Zhang and Jin Huang and Yuan Li", title = "Solving the data sparsity problem in destination prediction", journal = j-VLDB-J, volume = "24", number = "2", pages = "219--243", month = apr, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0369-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Mar 18 19:14:35 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Destination prediction is an essential task for many emerging location-based applications such as recommending sightseeing places and targeted advertising according to destinations. A common approach to destination prediction is to derive the probability of a location being the destination based on historical trajectories. However, almost all the existing techniques use various kinds of extra information such as road network, proprietary travel planner, statistics requested from government, and personal driving habits. Such extra information, in most circumstances, is unavailable or very costly to obtain. Thereby we approach the task of destination prediction by using only historical trajectory dataset. However, this approach encounters the ``data sparsity problem'', i.e., the available historical trajectories are far from enough to cover all possible query trajectories, which considerably limits the number of query trajectories that can obtain predicted destinations. We propose a novel method named Sub-Trajectory Synthesis (SubSyn) to address the data sparsity problem. SubSyn first decomposes historical trajectories into sub-trajectories comprising two adjacent locations, and then connects the sub-trajectories into ``synthesised'' trajectories. This process effectively expands the historical trajectory dataset to contain much more trajectories. Experiments based on real datasets show that SubSyn can predict destinations for up to ten times more query trajectories than a baseline prediction algorithm. Furthermore, the running time of the SubSyn-training algorithm is almost negligible for a large set of 1.9 million trajectories, and the SubSyn-prediction algorithm runs over two orders of magnitude faster than the baseline prediction algorithm constantly.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhang:2015:ECS, author = "Zhiwei Zhang and Jeffrey Xu Yu and Lu Qin and Lijun Chang and Xuemin Lin", title = "{I/O} efficient: computing {SCCs} in massive graphs", journal = j-VLDB-J, volume = "24", number = "2", pages = "245--270", month = apr, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0372-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Mar 18 19:14:35 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A strongly connected component ($ \mathsf {SCC}$) is a maximal subgraph of a directed graph GG in which every pair of nodes is reachable from each other in the $ \mathsf {SCC}$. With such a property, a general directed graph can be represented by a directed acyclic graph (DAG) by contracting every $ \mathsf {SCC}$ of GG to a node in DAG. In many real applications that need graph pattern matching, topological sorting, or reachability query processing, the best way to deal with a general directed graph is to deal with its DAG representation. Therefore, finding all \mathsf {SCC}SCCs in a directed graph GG is a critical operation. The existing in-memory algorithms based on depth first search (DFS) can find all $ \mathsf {SCC}$ s in linear time with respect to the size of a graph. However, when a graph cannot reside entirely in the main memory, the existing external or semi-external algorithms to find all $ \mathsf {SCC}$ s have limitation to achieve high I/O efficiency. In this paper, we study new I/O-efficient semi-external algorithms to find all $ \mathsf {SCC}$ s for a massive directed graph GG that cannot reside in main memory entirely. To overcome the deficiency of the existing DFS-based semi-external algorithm that heavily relies on a total order, we explore a weak order based on which we investigate new algorithms. We propose a new two-phase algorithm, namely, tree construction and tree search. In the tree construction phase, a spanning tree of GG can be constructed in bounded number of sequential scans of GG. In the tree search phase, it needs to sequentially scan the graph once to find all $ \mathsf {SCC}$ s. In addition, we propose a new single-phase algorithm, which combines the tree construction and tree search phases into a single phase, with three new optimization techniques. They are early acceptance, early rejection, and batch processing. By the single-phase algorithm with the new optimization techniques, we can significantly reduce the number of I/Os and the CPU cost. We prove the correctness of the algorithms. We conduct extensive experimental studies using 4 real datasets including a massive real dataset and several synthetic datasets to confirm the I/O efficiency of our approaches.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yuan:2015:GSS, author = "Ye Yuan and Guoren Wang and Lei Chen and Haixun Wang", title = "Graph similarity search on large uncertain graph databases", journal = j-VLDB-J, volume = "24", number = "2", pages = "271--296", month = apr, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0373-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Mar 18 19:14:35 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Many studies have been conducted on seeking an efficient solution for graph similarity search over certain (deterministic) graphs due to its wide application in many fields, including bioinformatics, social network analysis, and Resource Description Framework data management. All prior work assumes that the underlying data is deterministic. However, in reality, graphs are often noisy and uncertain due to various factors, such as errors in data extraction, inconsistencies in data integration, and for privacy-preserving purposes. Therefore, in this paper, we study similarity graph containment search on large uncertain graph databases. Similarity graph containment search consists of subgraph similarity search and supergraph similarity search. Different from previous works assuming that edges in an uncertain graph are independent of each other, we study uncertain graphs where edges' occurrences are correlated. We formally prove that subgraph or supergraph similarity search over uncertain graphs is \#P-hard; thus, we employ a filter-and-verify framework to speed up these two queries. For the subgraph similarity query, in the filtering phase, we develop tight lower and upper bounds of subgraph similarity probability based on a probabilistic matrix index (PMI). PMI is composed of discriminative subgraph features associated with tight lower and upper bounds of subgraph isomorphism probability. Based on PMI, we can filter out a large number of uncertain graphs and maximize the pruning capability. During the verification phase, we develop an efficient sampling algorithm to validate the remaining candidates. For the supergraph similarity query, in the filtering phase, we propose two pruning algorithms, one lightweight and the other strong, based on maximal common subgraphs of query graph and data graph. We run the two pruning algorithms against a probabilistic index that consists of powerful graph features. In the verification, we design an approximate algorithm based on the Horvitz---Thompson estimator to fast validate the remaining candidates. The efficiencies of our proposed solutions to the subgraph and supergraph similarity search have been verified through extensive experiments on real uncertain graph datasets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yang:2015:TPC, author = "Bin Yang and Chenjuan Guo and Yu Ma and Christian S. Jensen", title = "Toward personalized, context-aware routing", journal = j-VLDB-J, volume = "24", number = "2", pages = "297--318", month = apr, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0378-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Mar 18 19:14:35 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A driver's choice of a route to a destination may depend on the route's length and travel time, but a multitude of other, possibly hard-to-formalize aspects, may also factor into the driver's decision. There is evidence that a driver's choice of route is context dependent, e.g., varies across time, and that route choice also varies from driver to driver. In contrast, conventional routing services support little in the way of context dependence, and they deliver the same routes to all drivers. We study how to identify context-aware driving preferences for individual drivers from historical trajectories, and thus how to provide foundations for personalized navigation, but also professional driver education and traffic planning. We provide techniques that are able to capture time-dependent and uncertain properties of dynamic travel costs, such as travel time and fuel consumption, from trajectories, and we provide techniques capable of capturing the driving behaviors of different drivers in terms of multiple dynamic travel costs. Further, we propose techniques that are able to identify a driver's contexts and then to identify driving preferences for each context using historical trajectories from the driver. Empirical studies with a large trajectory data set offer insight into the design properties of the proposed techniques and suggest that they are effective.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Huang:2015:TKS, author = "Xin Huang and Hong Cheng and Rong-Hua Li and Lu Qin and Jeffrey Xu Yu", title = "Top-{$K$} structural diversity search in large networks", journal = j-VLDB-J, volume = "24", number = "3", pages = "319--343", month = jun, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0379-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri May 15 17:21:03 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Social contagion depicts a process of information (e.g., fads, opinions, news) diffusion in the online social networks. A recent study reports that in a social contagion process, the probability of contagion is tightly controlled by the number of connected components in an individual's neighborhood. Such a number is termed structural diversity of an individual, and it is shown to be a key predictor in the social contagion process. Based on this, a fundamental issue in a social network is to find top-kk users with the highest structural diversities. In this paper, we, for the first time, study the top-kk structural diversity search problem in a large network. Specifically, we study two types of structural diversity measures, namely, component-based structural diversity measure and core-based structural diversity measure. For component-based structural diversity, we develop an effective upper bound of structural diversity for pruning the search space. The upper bound can be incrementally refined in the search process. Based on such upper bound, we propose an efficient framework for top-kk structural diversity search. To further speed up the structural diversity evaluation in the search process, several carefully devised search strategies are proposed. We also design efficient techniques to handle frequent updates in dynamic networks and maintain the top-kk results. We further show how the techniques proposed in component-based structural diversity measure can be extended to handle the core-based structural diversity measure. Extensive experimental studies are conducted in real-world large networks and synthetic graphs, and the results demonstrate the efficiency and effectiveness of the proposed methods.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Papapetrou:2015:SDS, author = "Odysseas Papapetrou and Minos Garofalakis and Antonios Deligiannakis", title = "Sketching distributed sliding-window data streams", journal = j-VLDB-J, volume = "24", number = "3", pages = "345--368", month = jun, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0380-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri May 15 17:21:03 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "While traditional data management systems focus on evaluating single, ad hoc queries over static data sets in a centralized setting, several emerging applications require (possibly, continuous) answers to queries on dynamic data that is widely distributed and constantly updated. Furthermore, such query answers often need to discount data that is ``stale'' and operate solely on a sliding window of recent data arrivals (e.g., data updates occurring over the last 24 h). Such distributed data streaming applications mandate novel algorithmic solutions that are both time and space efficient (to manage high-speed data streams) and also communication efficient (to deal with physical data distribution). In this paper, we consider the problem of complex query answering over distributed, high-dimensional data streams in the sliding-window model. We introduce a novel sketching technique (termed ECM-sketch) that allows effective summarization of streaming data over both time-based and count-based sliding windows with probabilistic accuracy guarantees. Our sketch structure enables point, as well as inner product, queries and can be employed to address a broad range of problems, such as maintaining frequency statistics, finding heavy hitters, and computing quantiles in the sliding-window model. Focusing on distributed environments, we demonstrate how ECM-sketches of individual, local streams can be composed to generate a (low-error) ECM-sketch summary of the order-preserving merging of all streams; furthermore, we show how ECM-sketches can be exploited for continuous monitoring of sliding-window queries over distributed streams. Our extensive experimental study with two real-life data sets validates our theoretical claims and verifies the effectiveness of our techniques. To the best of our knowledge, ours is the first work to address efficient, guaranteed-error complex query answering over distributed data streams in the sliding-window model.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yuan:2015:EDS, author = "Ye Yuan and Guoren Wang and Jeffery Yu Xu and Lei Chen", title = "Efficient distributed subgraph similarity matching", journal = j-VLDB-J, volume = "24", number = "3", pages = "369--394", month = jun, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0381-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri May 15 17:21:03 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Given a query graph qq and a data graph GG, subgraph similarity matching is to retrieve all matches of qq in GG with the number of missing edges bounded by a given threshold $ \epsilon \in ? $. Many works have been conducted to study the problem of subgraph similarity matching due to its ability to handle applications involved with noisy or erroneous graph data. In practice, a data graph can be extremely large, e.g., a web-scale graph containing hundreds of millions of vertices and billions of edges. The state-of-the-art approaches employ centralized algorithms to process the subgraph similarity queries, and thus, they are infeasible for such a large graph due to the limited computational power and storage space of a centralized server. To address this problem, in this paper, we investigate subgraph similarity matching for a web-scale graph deployed in a distributed environment. We propose distributed algorithms and optimization techniques that exploit the properties of subgraph similarity matching, so that we can well utilize the parallel computing power and lower the communication cost among the distributed data centers for query processing. Specifically, we first relax and decompose qq into a minimum number of sub-queries. Next, we send each sub-query to conduct the exact matching in parallel. Finally, we schedule and join the exact matches to obtain final query answers. Moreover, our workload-balance strategy further speeds up the query processing. Our experimental results demonstrate the feasibility of our proposed approach in performing subgraph similarity matching over web-scale graph data.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Mirylenka:2015:CHH, author = "Katsiaryna Mirylenka and Graham Cormode and Themis Palpanas and Divesh Srivastava", title = "Conditional heavy hitters: detecting interesting correlations in data streams", journal = j-VLDB-J, volume = "24", number = "3", pages = "395--414", month = jun, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0382-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri May 15 17:21:03 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The notion of heavy hitters--items that make up a large fraction of the population--has been successfully used in a variety of applications across sensor and RFID monitoring, network data analysis, event mining, and more. Yet this notion often fails to capture the semantics we desire when we observe data in the form of correlated pairs. Here, we are interested in items that are conditionally frequent: when a particular item is frequent within the context of its parent item. In this work, we introduce and formalize the notion of conditional heavy hitters to identify such items, with applications in network monitoring and Markov chain modeling. We explore the relationship between conditional heavy hitters and other related notions in the literature, and show analytically and experimentally the usefulness of our approach. We introduce several algorithm variations that allow us to efficiently find conditional heavy hitters for input data with very different characteristics, and provide analytical results for their performance. Finally, we perform experimental evaluations with several synthetic and real datasets to demonstrate the efficacy of our methods and to study the behavior of the proposed algorithms for different types of data.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Gao:2015:ECP, author = "Yunjun Gao and Lu Chen and Xinhan Li and Bin Yao and Gang Chen", title = "Efficient $ k k$-closest pair queries in general metric spaces", journal = j-VLDB-J, volume = "24", number = "3", pages = "415--439", month = jun, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0383-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri May 15 17:21:03 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Given two object sets PP and QQ, a k-closest pair(k\hbox {CP})(kCP)query finds kk closest object pairs from P\times QP$ \times $Q. This operation is common in many real-life applications such as GIS, data mining, and recommender systems. Although it has received much attention in the Euclidean space, there is little prior work on the metric space. In this paper, we study the problem of kCP query processing in general metric spaces, namely Metric kCP(\hbox {M}k\hbox {CP})(MkCP)search, and propose several efficient algorithms using dynamic disk-based metric indexes (e.g., M-tree), which can be applied to arbitrary type of data as long as a certain metric distance is defined and satisfies the triangle inequality. Our approaches follow depth-first and/or best-first traversal paradigm(s), employ effective pruning rules based on metric space properties and the counting information preserved in the metric index, take advantage of aggressive pruning and compensation to further boost query efficiency, and derive a node-based cost model for \hbox {M}k\hbox {CP}MkCP retrieval. In addition, we extend our techniques to tackle two interesting variants of \hbox {M}k\hbox {CP}MkCP queries. Extensive experiments with both real and synthetic data sets demonstrate the performance of our proposed algorithms, the effectiveness of our developed pruning rules, and the accuracy of our presented cost model.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Aksoy:2015:RPE, author = "Cem Aksoy and Aggeliki Dimitriou and Dimitri Theodoratos", title = "Reasoning with patterns to effectively answer {XML} keyword queries", journal = j-VLDB-J, volume = "24", number = "3", pages = "441--465", month = jun, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0384-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri May 15 17:21:03 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Keyword search is a popular technique for searching tree-structured data on the Web because it frees the user from knowing a complex query language and the structure of the data sources. However, the imprecision of the keyword queries usually results in a very large number of results of which only a few are relevant to the query. Multiple previous approaches have tried to address this problem. They exploit the structural properties of the tree data in order to filter out irrelevant results. This is not an easy task though, and in the general case, these approaches show low precision and/or recall and low quality of result ranking. In this paper, we argue that exploiting the structural relationships of the query matches locally in the data tree is not sufficient and a global analysis of the keyword matches in the data tree is necessary in order to assign meaningful semantics to keyword queries. We present an original approach for answering keyword queries which extracts structural patterns of the query matches and reasons with them in order to return meaningful results ranked with respect to their relevance to the query. Comparisons between patterns are realized based on different types of homomorphisms between patterns. As the number of patterns is typically much smaller than that of the of query matches, this global reasoning is feasible. We design an efficient stack-based algorithm for evaluating keyword queries on tree-structured data, and we also devise a heuristic extension which further improves its performance. We run comprehensive experiments on different datasets to evaluate the efficiency of the algorithms and the effectiveness of our ranking and filtering semantics. The experimental results show that our approach produces results of higher quality compared to previous ones and our algorithms are fast and scale well with respect to the input and output size.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Roy:2015:TAO, author = "Senjuti Basu Roy and Ioanna Lykourentzou and Saravanan Thirumuruganathan and Sihem Amer-Yahia and Gautam Das", title = "Task assignment optimization in knowledge-intensive crowdsourcing", journal = j-VLDB-J, volume = "24", number = "4", pages = "467--491", month = aug, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0385-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Aug 8 13:52:45 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We present SmartCrowd, a framework for optimizing task assignment in knowledge-intensive crowdsourcing (KI-C). SmartCrowd distinguishes itself by formulating, for the first time, the problem of worker-to-task assignment in KI-C as an optimization problem, by proposing efficient adaptive algorithms to solve it and by accounting for human factors, such as worker expertise, wage requirements, and availability inside the optimization process. We present rigorous theoretical analyses of the task assignment optimization problem and propose optimal and approximation algorithms with guarantees, which rely on index pre-computation and adaptive maintenance. We perform extensive performance and quality experiments using real and synthetic data to demonstrate that the SmartCrowd approach is necessary to achieve efficient task assignments of high-quality under guaranteed cost budget.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Bao:2015:GFR, author = "Zhifeng Bao and Yong Zeng and Tok Wang Ling and Dongxiang Zhang and Guoliang Li and H. V. Jagadish", title = "A general framework to resolve the {MisMatch} problem in {XML} keyword search", journal = j-VLDB-J, volume = "24", number = "4", pages = "493--518", month = aug, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0386-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Aug 8 13:52:45 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "When users issue a query to a database, they have expectations about the results. If what they search for is unavailable in the database, the system will return an empty result or, worse, erroneous mismatch results. We call this problem the MisMatch problem. In this paper, we solve the MisMatch problem in the context of XML keyword search. Our solution is based on two novel concepts that we introduce: target node type and Distinguishability. Target Node Type represents the type of node a query result intends to match, and Distinguishability is used to measure the importance of the query keywords. Using these concepts, we develop a low-cost post-processing algorithm on the results of query evaluation to detect the MisMatch problem and generate helpful suggestions to users. Our approach has three noteworthy features: (1) for queries with the MisMatch problem, it generates the explanation, suggested queries and their sample results as the output to users, helping users judge whether the MisMatch problem is solved without reading all query results; (2) it is portable as it can work with any lowest common ancestor-based matching semantics (for XML data without ID references) or minimal Steiner tree-based matching semantics (for XML data with ID references) which return tree structures as results. It is orthogonal to the choice of result retrieval method adopted; (3) it is lightweight in the way that it occupies a very small proportion of the whole query evaluation time. Extensive experiments on three real datasets verify the effectiveness, efficiency and scalability of our approach. A search engine called XClear has been built and is available at http://xclear.comp.nus.edu.sg.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Kotsifakos:2015:EBS, author = "Alexios Kotsifakos and Isak Karlsson and Panagiotis Papapetrou and Vassilis Athitsos and Dimitrios Gunopulos", title = "Embedding-based subsequence matching with gaps --- range --- tolerances: a {Query-By-Humming} application", journal = j-VLDB-J, volume = "24", number = "4", pages = "519--536", month = aug, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0387-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Aug 8 13:52:45 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We present a subsequence matching framework that allows for gaps in both query and target sequences, employs variable matching tolerance efficiently tuned for each query and target sequence, and constrains the maximum matching range. Using this framework, a dynamic programming method is proposed, called SMBGT, that, given a short query sequence Q and a large database, identifies in quadratic time the subsequence of the database that best matches Q. SMBGT is highly applicable to music retrieval. However, in Query-By-Humming applications, runtime is critical. Hence, we propose a novel embedding-based approach, called ISMBGT, for speeding up search under SMBGT. Using a set of reference sequences, ISMBGT maps both Q and each position of each database sequence into vectors. The database vectors closest to the query vector are identified, and SMBGT is then applied between Q and the subsequences that correspond to those database vectors. The key novelties of ISMBGT are that it does not require training, it is query sensitive, and it exploits the flexibility of SMBGT. We present an extensive experimental evaluation using synthetic and hummed queries on a large music database. Our findings show that ISMBGT can achieve speedups of up to an order of magnitude against brute-force search and over an order of magnitude against cDTW, while maintaining a retrieval accuracy very close to that of brute-force search.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Skovsgaard:2015:FTR, author = "Anders Skovsgaard and Christian S. Jensen", title = "Finding top-$k$ relevant groups of spatial web objects", journal = j-VLDB-J, volume = "24", number = "4", pages = "537--555", month = aug, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0388-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Aug 8 13:52:45 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The web is increasingly being accessed from geo-positioned devices such as smartphones, and rapidly increasing volumes of web content are geo-tagged. In addition, studies show that a substantial fraction of all web queries has local intent. This development motivates the study of advanced spatial keyword-based querying of web content. Previous research has primarily focused on the retrieval of the top-k individual spatial web objects that best satisfy a query specifying a location and a set of keywords. This paper proposes a new type of query functionality that returns top-k groups of objects while taking into account aspects such as group density, distance to the query, and relevance to the query keywords. To enable efficient processing, novel indexing and query processing techniques for single and multiple keyword queries are proposed. Empirical performance studies with an implementation of the techniques and real data suggest that the proposals are viable in practical settings.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Abedjan:2015:PRD, author = "Ziawasch Abedjan and Lukasz Golab and Felix Naumann", title = "Profiling relational data: a survey", journal = j-VLDB-J, volume = "24", number = "4", pages = "557--581", month = aug, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0389-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Aug 8 13:52:45 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Profiling data to determine metadata about a given dataset is an important and frequent activity of any IT professional and researcher and is necessary for various use-cases. It encompasses a vast array of methods to examine datasets and produce metadata. Among the simpler results are statistics, such as the number of null values and distinct values in a column, its data type, or the most frequent patterns of its data values. Metadata that are more difficult to compute involve multiple columns, namely correlations, unique column combinations, functional dependencies, and inclusion dependencies. Further techniques detect conditional properties of the dataset at hand. This survey provides a classification of data profiling tasks and comprehensively reviews the state of the art for each class. In addition, we review data profiling tools and systems from research and industry. We conclude with an outlook on the future of data profiling beyond traditional profiling tasks and beyond relational databases.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Deutch:2015:PBA, author = "Daniel Deutch and Yuval Moskovitch and Val Tannen", title = "Provenance-based analysis of data-centric processes", journal = j-VLDB-J, volume = "24", number = "4", pages = "583--607", month = aug, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0390-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Aug 8 13:52:45 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We consider in this paper static analysis of the possible executions of data-dependent applications, namely applications whose control flow is guided by a finite-state machine, as well as by the state of an underlying database. We note that previous work in this context has not addressed two important features of such analysis, namely analysis under hypothetical scenarios, such as changes to the application's state machine and/or to the underlying database, and the consideration of meta-data, such as cost or access privileges. Observing that semiring-based provenance has been proven highly effective in supporting these two features for database queries, we develop in this paper a semiring-based provenance framework for the analysis of data-dependent processes, accounting for hypothetical reasoning and meta-data. The development addresses two interacting new challenges: (1) combining provenance annotations for both information that resides in the database and information about external inputs (e.g., user choices) and (2) finitely capturing infinitely many process executions. We have implemented our framework as part of the PROPOLIS system.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Bohlen:2015:SIB, author = "Michael H. B{\"o}hlen and Christoph Koch", title = "Special issue on best papers of {VLDB 2013}", journal = j-VLDB-J, volume = "24", number = "5", pages = "609--610", month = oct, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0401-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Sep 18 06:51:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yan:2015:ALK, author = "Zhepeng Yan and Nan Zheng and Zachary G. Ives and Partha Pratim Talukdar and Cong Yu", title = "Active learning in keyword search-based data integration", journal = j-VLDB-J, volume = "24", number = "5", pages = "611--631", month = oct, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0374-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Sep 18 06:51:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The problem of scaling up data integration, such that new sources can be quickly utilized as they are discovered, remains elusive: Global schemas for integrated data are difficult to develop and expand, and schema and record matching techniques are limited by the fact that data and metadata are often under-specified and must be disambiguated by data experts. One promising approach is to avoid using a global schema, and instead to develop keyword search-based data integration--where the system lazily discovers associations enabling it to join together matches to keywords, and return ranked results. The user is expected to understand the data domain and provide feedback about answers' quality. The system generalizes such feedback to learn how to correctly integrate data. A major open challenge is that under this model, the user only sees and offers feedback on a few ``top-kk'' results: This result set must be carefully selected to include answers of high relevance and answers that are highly informative when feedback is given on them. Existing systems merely focus on predicting relevance, by composing the scores of various schema and record matching algorithms. In this paper, we show how to predict the uncertainty associated with a query result's score, as well as how informative feedback is on a given result. We build upon these foundations to develop an active learning approach to keyword search-based data integration, and we validate the effectiveness of our solution over real data from several very different domains.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zou:2015:CDA, author = "Tao Zou and Ronan Bras and Marcos Vaz Salles and Alan Demers and Johannes Gehrke", title = "{ClouDiA}: a deployment advisor for public clouds", journal = j-VLDB-J, volume = "24", number = "5", pages = "633--653", month = oct, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0375-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Sep 18 06:51:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "An increasing number of distributed data-driven applications are moving into shared public clouds. By sharing resources and operating at scale, public clouds promise higher utilization and lower costs than private clusters. To achieve high utilization, however, cloud providers inevitably allocate virtual machine instances non-contiguously; i.e., instances of a given application may end-up in physically distant machines in the cloud. This allocation strategy can lead to large differences in average latency between instances. For a large class of applications, this difference can result in significant performance degradation, unless care is taken in how application components are mapped to instances. In this paper, we propose ClouDiA, a general deployment advisor that selects application node deployments minimizing either (i) the largest latency between application nodes, or (ii) the longest critical path among all application nodes. ClouDiA employs a number of algorithmic techniques, including mixed-integer programming and constraint programming techniques, to efficiently search the space of possible mappings of application nodes to instances. Through experiments with synthetic and real applications in Amazon EC2, we show that mean latency is a robust metric to model communication cost in these applications and that our search techniques yield a 15---55 \% reduction in time-to-solution or service response time, without any need for modifying application code.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhu:2015:SAP, author = "Fanwei Zhu and Yuan Fang and Kevin Chen-Chuan Chang and Jing Ying", title = "Scheduled approximation for {Personalized PageRank} with {Utility-based Hub Selection}", journal = j-VLDB-J, volume = "24", number = "5", pages = "655--679", month = oct, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0376-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Sep 18 06:51:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/pagerank.bib; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "As Personalized PageRank has been widely leveraged for ranking on a graph, the efficient computation of Personalized PageRank Vector (PPV) becomes a prominent issue. In this paper, we propose FastPPV, an approximate PPV computation algorithm that is incremental and accuracy-aware. Our approach hinges on a novel paradigm of scheduled approximation: the computation is partitioned and scheduled for processing in an ``organized'' way, such that we can gradually improve our PPV estimation in an incremental manner and quantify the accuracy of our approximation at query time. Guided by this principle, we develop an efficient hub-based realization, where we adopt the metric of hub length to partition and schedule random walk tours so that the approximation error reduces exponentially over iterations. In addition, as tours are segmented by hubs, the shared substructures between different tours (around the same hub) can be reused to speed up query processing both within and across iterations. Given the key roles played by the hubs, we further investigate the problem of hub selection. In particular, we develop a conceptual model to select hubs based on the two desirable properties of hubs--sharing and discriminating, and present several different strategies to realize the conceptual model. Finally, we evaluate FastPPV over two real-world graphs, and show that it not only significantly outperforms two state-of-the-art baselines in both online and offline phrases, but also scales well on larger graphs. In particular, we are able to achieve near-constant time online query processing irrespective of graph size.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ren:2015:VLM, author = "Kun Ren and Alexander Thomson and Daniel J. Abadi", title = "{VLL}: a lock manager redesign for main memory database systems", journal = j-VLDB-J, volume = "24", number = "5", pages = "681--705", month = oct, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-014-0377-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Sep 18 06:51:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Lock managers are increasingly becoming a bottleneck in database systems that use pessimistic concurrency control. In this paper, we introduce very lightweight locking (VLL), an alternative approach to pessimistic concurrency control for main memory database systems, which avoids almost all overhead associated with traditional lock manager operations. We also propose a protocol called selective contention analysis (SCA), which enables systems implementing VLL to achieve high transactional throughput under high-contention workloads. We implement these protocols both in a traditional single-machine multi-core database server setting and in a distributed database where data are partitioned across many commodity machines in a shared-nothing cluster. Furthermore, we show how VLL and SCA can be extended to enable range locking. Our experiments show that VLL dramatically reduces locking overhead and thereby increases transactional throughput in both settings.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Galarraga:2015:FRM, author = "Luis Gal{\'a}rraga and Christina Teflioudi and Katja Hose and Fabian M. Suchanek", title = "Fast rule mining in ontological knowledge bases with {AMIE++}", journal = j-VLDB-J, volume = "24", number = "6", pages = "707--730", month = dec, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0394-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Nov 25 15:38:42 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Recent advances in information extraction have led to huge knowledge bases (KBs), which capture knowledge in a machine-readable format. Inductive logic programming (ILP) can be used to mine logical rules from these KBs, such as ``If two persons are married, then they (usually) live in the same city.'' While ILP is a mature field, mining logical rules from KBs is difficult, because KBs make an open-world assumption. This means that absent information cannot be taken as counterexamples. Our approach AMIE (Gal&\#225;rraga et al. in WWW, 2013) has shown how rules can be mined effectively from KBs even in the absence of counterexamples. In this paper, we show how this approach can be optimized to mine even larger KBs with more than 12M statements. Extensive experiments show how our new approach, AMIE++, extends to areas of mining that were previously beyond reach.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chandra:2015:DGT, author = "Bikash Chandra and Bhupesh Chawda and Biplab Kar and K. V. Reddy and Shetal Shah and S. Sudarshan", title = "Data generation for testing and grading {SQL} queries", journal = j-VLDB-J, volume = "24", number = "6", pages = "731--755", month = dec, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0395-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Nov 25 15:38:42 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Correctness of SQL queries is usually tested by executing the queries on one or more datasets. Erroneous queries are often the results of small changes or mutations of the correct query. A mutation Q'`? of a query Q is killed by a dataset D if Q(D) \ne `? Q'`?(D). Earlier work on the XData system showed how to generate datasets that kill all mutations in a class of mutations that included join type and comparison operation mutations. In this paper, we extend the XData data generation techniques to handle a wider variety of SQL queries and a much larger class of mutations. We have also built a system for grading SQL queries using the datasets generated by XData. We present a study of the effectiveness of the datasets generated by the extended XData approach, using a variety of queries including queries submitted by students as part of a database course. We show that the XData datasets outperform predefined datasets as well as manual grading done earlier by teaching assistants, while also avoiding the drudgery of manual correction. Thus, we believe that our techniques will be of great value to database course instructors and TAs, particularly to those of MOOCs. It will also be valuable to database application developers and testers for testing SQL queries.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2015:MMO, author = "Chao Li and Gerome Miklau and Michael Hay and Andrew Mcgregor and Vibhor Rastogi", title = "The matrix mechanism: optimizing linear counting queries under differential privacy", journal = j-VLDB-J, volume = "24", number = "6", pages = "757--781", month = dec, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0398-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Nov 25 15:38:42 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Differential privacy is a robust privacy standard that has been successfully applied to a range of data analysis tasks. We describe the matrix mechanism, an algorithm for answering a workload of linear counting queries that adapts the noise distribution to properties of the provided queries. Given a workload, the mechanism uses a different set of queries, called a query strategy, which are answered using a standard Laplace or Gaussian mechanism. Noisy answers to the workload queries are then derived from the noisy answers to the strategy queries. This two-stage process can result in a more complex, correlated noise distribution that preserves differential privacy but increases accuracy. We provide a formal analysis of the error of query answers produced by the mechanism and investigate the problem of computing the optimal query strategy in support of a given workload. We show that this problem can be formulated as a rank-constrained semidefinite program. We analyze two seemingly distinct techniques proposed in the literature, whose similar behavior is explained by viewing them as instances of the matrix mechanism. We also describe an extension of the mechanism in which nonnegativity constraints are included in the derivation process and provide experimental evidence of its efficacy.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Armenatzoglou:2015:GSR, author = "Nikos Armenatzoglou and Ritesh Ahuja and Dimitris Papadias", title = "{Geo-Social Ranking}: functions and query processing", journal = j-VLDB-J, volume = "24", number = "6", pages = "783--799", month = dec, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0400-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Nov 25 15:38:42 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Given a query location q, Geo-Social Ranking (GSR) ranks the users of a Geo-Social Network based on their distance to q, the number of their friends in the vicinity of q, and possibly the connectivity of those friends. We propose a general GSR framework and four GSR functions that assign scores in different ways: (i) LC, which is a weighted linear combination of social (i.e., friendships) and spatial (i.e., distance to q) aspects, (ii) RC, which is a ratio combination of the two aspects, (iii) HGS, which considers the number of friends in coincident circles centered at q, and (iv) GST, which takes into account triangles of friends in the vicinity of q. We investigate the behavior of the functions, qualitatively assess their results, and study the effects of their parameters. Moreover, for each ranking function, we design a query processing technique that utilizes its specific characteristics to efficiently retrieve the top-k users. Finally, we experimentally evaluate the performance of the top-k algorithms with real and synthetic datasets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Santini:2015:QSU, author = "Simone Santini", title = "Querying streams using regular expressions: some semantics, decidability, and efficiency issues", journal = j-VLDB-J, volume = "24", number = "6", pages = "801--821", month = dec, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0402-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Nov 25 15:38:42 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This paper analyzes the decidability and complexity problems that arise when matching regular expressions on infinite streams of sets of symbols. We show that in important application domains, several apparently obvious semantics lead to detecting spurious events (events that are mere artifacts of the semantics) or to missing events of potential interest. We single out a class of semantics, of interest in many applications, which we dub use-and-throw: In a use-and-throw semantics, an elementary event can participate in the creation of at most one detected complex event. Many areas of research have identified this as a desirable requirement (we give the examples of databases and video surveillance), but hitherto there has been no systematic study of the characteristics of these semantics, in particular their decidability and algorithmic complexity. This paper is meant to provide at least some initial answers on this subject. We analyze several semantics, provide polynomial algorithms for them, and prove their correctness and their properties.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2015:ATE, author = "Xiang Wang and Ying Zhang and Wenjie Zhang and Xuemin Lin and Wei Wang", title = "{AP-Tree}: efficiently support location-aware {Publish\slash Subscribe}", journal = j-VLDB-J, volume = "24", number = "6", pages = "823--848", month = dec, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0403-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Nov 25 15:38:42 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We investigate the problem of efficiently supporting location-aware Publish/Subscribe (Pub/Sub for short), which is essential in many applications such as location-based recommendation and advertising, thanks to the proliferation of geo-equipped devices and the ensuing location-based social media applications. In a location-aware Pub/Sub system (e.g., an e-coupon system), subscribers can register their interest as spatial-keyword subscriptions (e.g., interest in nearby iphone discount); each incoming geo-textual message (e.g., geo-tagged e-coupon) will be delivered to all the relevant subscribers immediately. While there are several prior approaches aiming at providing efficient processing techniques for this problem, their approaches belong to spatial-prioritized indexing method which cannot well exploit the keyword distribution. In addition, their textual filtering techniques are built upon simple variants of traditional inverted indexes, which do not perform well for the textual constraint imposed by the problem. In this paper, we address the above limitations and provide a highly efficient solution based on a novel adaptive index, named AP-Tree. AP-Tree adaptively groups registered subscriptions using keyword and spatial partitions, guided by a cost model. AP-Tree also naturally indexes ordered keyword combinations. Furthermore, we show that our techniques can be extended to process moving spatial-keyword subscriptions, where subscribers can continuously update their locations. We present efficient algorithms to process both stationary and moving subscriptions, which can seamlessly and effectively integrate keyword and spatial partitions. Our extensive experiments demonstrate that AP-Tree and its variant AP ^{+}+ -Tree can achieve up to an order of magnitude improvement on efficiency compared with prior state-of-the-art methods.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Basik:2015:STS, author = "Fuat Bas{\i}k and Bu{\u{g}}ra Gedik and Hakan Ferhatosmano{\u{g}}lu and Mert Emin Kalender", title = "{S$^{33}$-TM}: scalable streaming short text matching", journal = j-VLDB-J, volume = "24", number = "6", pages = "849--866", month = dec, year = "2015", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0404-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Nov 25 15:38:42 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/string-matching.bib; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Micro-blogging services have become major venues for information creation, as well as channels of information dissemination. Accordingly, monitoring them for relevant information is a critical capability. This is typically achieved by registering content-based subscriptions with the micro-blogging service. Such subscriptions are long-running queries that are evaluated against the stream of posts. Given the popularity and scale of micro-blogging services like Twitter and Weibo, building a scalable infrastructure to evaluate these subscriptions is a challenge. To address this challenge, we present the S^33-TM system for streaming short text matching. S^33-TM is organized as a stream processing application, in the form of a data parallel flow graph designed to be run on a data center environment. It takes advantage of the structure of the publications (posts) and subscriptions to perform the matching in a scalable manner, without broadcasting publications or subscriptions to all of the matcher instances. The basic design of S^33-TM uses a scoped multicast for publications and scoped anycast for subscriptions. To further improve throughput, we introduce publication routing algorithms that aim at minimizing the scope of the multicasts. First set of algorithms we develop are based on partitioning the word co-occurrence frequency graph, with the aim of routing posts that include commonly co-occurring words to a small set of matchers. While effective, these algorithms fell short in balancing the load. To address this, we develop the SALB algorithm, which provides better load balance by modeling the load more accurately using the word-to-post bipartite graph. We also develop a subscription placement algorithm, called LASP, to group together similar subscriptions, in order to minimize the subscription matching cost. Furthermore, to achieve good scalability for increasing number of nodes, we introduce techniques to handle workload skew. Finally, we introduce load shedding techniques for handling unexpected load spikes with small impact on the accuracy. Our experimental results show that S^33-TM is scalable. Furthermore, the SALB algorithm provides more than 2.5\times 2.5$ \times $ throughput compared to the baseline multicast and outperforms the graph partitioning-based approaches.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Jagadish:2016:SIB, author = "H. V. Jagadish and Aoying Zhou", title = "Special issue on best papers of {VLDB 2014}", journal = j-VLDB-J, volume = "25", number = "1", pages = "1--2", month = feb, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0399-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jan 21 17:41:55 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Jiang:2016:EES, author = "Dawei Jiang and Sai Wu and Gang Chen and Beng Chin Ooi and Kian-Lee Tan and Jun Xu", title = "{epiC}: an extensible and scalable system for processing {Big Data}", journal = j-VLDB-J, volume = "25", number = "1", pages = "3--26", month = feb, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0393-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jan 21 17:41:55 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The Big Data problem is characterized by the so-called 3V features: volume --- a huge amount of data, velocity --- a high data ingestion rate, and variety --- a mix of structured data, semi-structured data, and unstructured data. The state-of-the-art solutions to the Big Data problem are largely based on the MapReduce framework (aka its open source implementation Hadoop). Although Hadoop handles the data volume challenge successfully, it does not deal with the data variety well since the programming interfaces and its associated data processing model are inconvenient and inefficient for handling structured data and graph data. This paper presents epiC, an extensible system to tackle the Big Data's data variety challenge. epiC introduces a general Actor-like concurrent programming model, independent of the data processing models, for specifying parallel computations. Users process multi-structured datasets with appropriate epiC extensions, and the implementation of a data processing model best suited for the data type and auxiliary code for mapping that data processing model into epiC's concurrent programming model. Like Hadoop, programs written in this way can be automatically parallelized and the runtime system takes care of fault tolerance and inter-machine communications. We present the design and implementation of epiC's concurrent programming model. We also present two customized data processing models, an optimized MapReduce extension and a relational model, on top of epiC. We show how users can leverage epiC to process heterogeneous data by linking different types of operators together. To improve the performance of complex analytic jobs, epiC supports a partition-based optimization technique where data are streamed between the operators to avoid the high I/O overheads. Experiments demonstrate the effectiveness and efficiency of our proposed epiC.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Schuhknecht:2016:EEA, author = "Felix Martin Schuhknecht and Alekh Jindal and Jens Dittrich", title = "An experimental evaluation and analysis of database cracking", journal = j-VLDB-J, volume = "25", number = "1", pages = "27--52", month = feb, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0397-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jan 21 17:41:55 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Database cracking has been an area of active research in recent years. The core idea of database cracking is to create indexes adaptively and incrementally as a side product of query processing. Several works have proposed different cracking techniques for different aspects including updates, tuple reconstruction, convergence, concurrency control, and robustness. Our 2014 VLDB paper ``The Uncracked Pieces in Database Cracking'' (PVLDB 7:97---108, 2013/VLDB 2014) was the first comparative study of these different methods by an independent group. In this article, we extend our published experimental study on database cracking and bring it to an up-to-date state. Our goal is to critically review several aspects, identify the potential, and propose promising directions in database cracking. With this study, we hope to expand the scope of database cracking and possibly leverage cracking in database engines other than MonetDB. We repeat several prior database cracking works including the core cracking algorithms as well as three other works on convergence (hybrid cracking), tuple reconstruction (sideways cracking), and robustness (stochastic cracking), respectively. Additionally to our conference paper, we now also look at a recently published study about CPU efficiency (predication cracking). We evaluate these works and show possible directions to do even better. As a further extension, we evaluate the whole class of parallel cracking algorithms that were proposed in three recent works. Altogether, in this work we revisit 8 papers on database cracking and evaluate in total 18 cracking methods, 6 sorting algorithms, and 3 full index structures. Additionally, we test cracking under a variety of experimental settings, including high selectivity (Low selectivity means that many entries qualify. Consequently, a high selectivity means, that only few entries qualify) queries, low selectivity queries, varying selectivity, and multiple query access patterns. Finally, we compare cracking against different sorting algorithms as well as against different main memory optimized indexes, including the recently proposed adaptive radix tree (ART). Our results show that: (1) the previously proposed cracking algorithms are repeatable, (2) there is still enough room to significantly improve the previously proposed cracking algorithms, (3) parallelizing cracking algorithms efficiently is a hard task, (4) cracking depends heavily on query selectivity, (5) cracking needs to catch up with modern indexing trends, and (6) different indexing algorithms have different indexing signatures.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Jugel:2016:VAV, author = "Uwe Jugel and Zbigniew Jerzak and Gregor Hackenbroich and Volker Markl", title = "{VDDA}: automatic visualization-driven data aggregation in relational databases", journal = j-VLDB-J, volume = "25", number = "1", pages = "53--77", month = feb, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0396-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jan 21 17:41:55 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Contemporary RDBMS-based systems for visualization of high-volume numerical data have difficulty to cope with the hard latency requirements and high ingestion rates of interactive visualizations. Existing solutions for lowering the volume of large data sets disregard the spatial properties of visualizations, resulting in visualization errors. In this work, we introduce VDDA, a visualization-driven data aggregation that models visual aggregation at the pixel level as data aggregation at the query level. Based on the M4 aggregation for producing pixel-perfect line charts from highly reduced data subsets, we define a complete set of data reduction operators that simulate the overplotting behavior of the most frequently used chart types. Relying only on the relational algebra and the common data aggregation functions, our approach is generic and applicable to any visualization system that consumes data stored in relational databases. We demonstrate our visualization-driven data aggregation using real-world data sets from high-tech manufacturing, stock markets, and sports analytics, reducing data volumes by up to two orders of magnitude, while preserving pixel-perfect visualizations, as producible from the raw data.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2016:EDL, author = "Wei Wang and Xiaoyan Yang and Beng Chin Ooi and Dongxiang Zhang and Yueting Zhuang", title = "Effective deep learning-based multi-modal retrieval", journal = j-VLDB-J, volume = "25", number = "1", pages = "79--101", month = feb, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0391-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jan 21 17:41:55 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Multi-modal retrieval is emerging as a new search paradigm that enables seamless information retrieval from various types of media. For example, users can simply snap a movie poster to search for relevant reviews and trailers. The mainstream solution to the problem is to learn a set of mapping functions that project data from different modalities into a common metric space in which conventional indexing schemes for high-dimensional space can be applied. Since the effectiveness of the mapping functions plays an essential role in improving search quality, in this paper, we exploit deep learning techniques to learn effective mapping functions. In particular, we first propose a general learning objective that effectively captures both intramodal and intermodal semantic relationships of data from heterogeneous sources. Given the general objective, we propose two learning algorithms to realize it: (1) an unsupervised approach that uses stacked auto-encoders and requires minimum prior knowledge on the training data and (2) a supervised approach using deep convolutional neural network and neural language model. Our training algorithms are memory efficient with respect to the data volume. Given a large training dataset, we split it into mini-batches and adjust the mapping functions continuously for each batch. Experimental results on three real datasets demonstrate that our proposed methods achieve significant improvement in search accuracy over the state-of-the-art solutions.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Funke:2016:KPC, author = "Stefan Funke and Andr{\'e} Nusser and Sabine Storandt", title = "On {$k$-Path Covers} and their applications", journal = j-VLDB-J, volume = "25", number = "1", pages = "103--123", month = feb, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0392-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jan 21 17:41:55 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "For a directed graph G with vertex set V, we call a subset $ C \subseteq V $ a $k$-(All-)Path Cover if $C$ contains a node from any simple path in $G$ consisting of $k$ nodes. This paper considers the problem of constructing small $k$-Path Covers in the context of road networks with millions of nodes and edges. In many application scenarios, the set C and its induced overlay graph constitute a very compact synopsis of $G$, which is the basis for the currently fastest data structure for personalized shortest path queries, visually pleasing overlays of subsampled paths, and efficient reporting, retrieval and aggregation of associated data in spatial network databases. Apart from a theoretic investigation of the problem, we provide efficient algorithms that produce very small $k$-Path Covers for large real-world road networks (with a posteriori guarantees via instance-based lower bounds). We also apply our algorithms to other (social, collaboration, web, etc.) networks and can improve in several instances upon previous approaches.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Quamar:2016:NNC, author = "Abdul Quamar and Amol Deshpande and Jimmy Lin", title = "{NScale}: neighborhood-centric large-scale graph analytics in the cloud", journal = j-VLDB-J, volume = "25", number = "2", pages = "125--150", month = apr, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0405-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Mar 25 16:34:05 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "There is an increasing interest in executing complex analyses over large graphs, many of which require processing a large number of multi-hop neighborhoods or subgraphs. Examples include ego network analysis, motif counting, finding social circles, personalized recommendations, link prediction, anomaly detection, analyzing influence cascades, and others. These tasks are not well served by existing vertex-centric graph processing frameworks, where user programs are only able to directly access the state of a single vertex at a time, resulting in high communication, scheduling, and memory overheads in executing such tasks. Further, most existing graph processing frameworks ignore the challenges in extracting the relevant portions of the graph that an analysis task is interested in, and loading those onto distributed memory. This paper introduces NScale, a novel end-to-end graph processing framework that enables the distributed execution of complex subgraph-centric analytics over large-scale graphs in the cloud. NScale enables users to write programs at the level of subgraphs rather than at the level of vertices. Unlike most previous graph processing frameworks, which apply the user program to the entire graph, NScale allows users to declaratively specify subgraphs of interest. Our framework includes a novel graph extraction and packing (GEP) module that utilizes a cost-based optimizer to partition and pack the subgraphs of interest into memory on as few machines as possible. The distributed execution engine then takes over and runs the user program in parallel on those subgraphs, restricting the scope of the execution appropriately, and utilizes novel techniques to minimize memory consumption by exploiting overlaps among the subgraphs. We present a comprehensive empirical evaluation comparing against three state-of-the-art systems, namely Giraph, GraphLab, and GraphX, on several real-world datasets and a variety of analysis tasks. Our experimental results show orders-of-magnitude improvements in performance and drastic reductions in the cost of analytics compared to vertex-centric approaches.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Soule:2016:RAS, author = "Robert Soul{\'e} and Bugra Gedik", title = "{RailwayDB}: adaptive storage of interaction graphs", journal = j-VLDB-J, volume = "25", number = "2", pages = "151--169", month = apr, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0407-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Mar 25 16:34:05 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We are living in an ever more connected world, where data recording the interactions between people, software systems, and the physical world is becoming increasingly prevalent. These data often take the form of a temporally evolving graph, where entities are the vertices and the interactions between them are the edges. We call such graphs interaction graphs. Various domains, including telecommunications, transportation, and social media, depend on analytics performed on interaction graphs. The ability to efficiently support historical analysis over interaction graphs requires effective solutions for the problem of data layout on disk. This paper presents an adaptive disk layout called the railway layout for optimizing disk block storage for interaction graphs. The key idea is to divide blocks into one or more sub-blocks. Each sub-block contains the entire graph structure, but only a subset of the attributes. This improves query I/O, at the cost of increased storage overhead. We introduce optimal integer linear program (ILP) formulations for partitioning disk blocks into sub-blocks with overlapping and nonoverlapping attributes. Additionally, we present greedy heuristics that can scale better compared to the ILP alternatives, yet achieve close to optimal query I/O. We provide an implementation of the railway layout as part of RailwayDB--an open-source graph database we have developed. To demonstrate the benefits of the railway layout, we provide an extensive experimental evaluation, including model-based as well as empirical results comparing our approach to baseline alternatives.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yuan:2016:DTK, author = "Long Yuan and Lu Qin and Xuemin Lin and Lijun Chang and Wenjie Zhang", title = "Diversified top-$k$ clique search", journal = j-VLDB-J, volume = "25", number = "2", pages = "171--196", month = apr, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0408-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Mar 25 16:34:05 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Maximal clique enumeration is a fundamental problem in graph theory and has been extensively studied. However, maximal clique enumeration is time-consuming in large graphs and always returns enormous cliques with large overlaps. Motivated by this, in this paper, we study the diversified top-k clique search problem which is to find top-k cliques that can cover most number of nodes in the graph. Diversified top-k clique search can be widely used in a lot of applications including community search, motif discovery, and anomaly detection in large graphs. A naive solution for diversified top-k clique search is to keep all maximal cliques in memory and then find k of them that cover most nodes in the graph by using the approximate greedy max k-cover algorithm. However, such a solution is impractical when the graph is large. In this paper, instead of keeping all maximal cliques in memory, we devise an algorithm to maintain k candidates in the process of maximal clique enumeration. Our algorithm has limited memory footprint and can achieve a guaranteed approximation ratio. We also introduce a novel light-weight \mathsf {PNP}PNP-\mathsf {Index}Index, based on which we design an optimal maximal clique maintenance algorithm. We further explore three optimization strategies to avoid enumerating all maximal cliques and thus largely reduce the computational cost. Besides, for the massive input graph, we develop an I/O efficient algorithm to tackle the problem when the input graph cannot fit in main memory. We conduct extensive performance studies on real graphs and synthetic graphs. One of the real graphs contains 1.02 billion edges. The results demonstrate the high efficiency and effectiveness of our approach.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Pham:2016:ACW, author = "Thao N. Pham and Panos K. Chrysanthis and Alexandros Labrinidis", title = "Avoiding class warfare: managing continuous queries with differentiated classes of service", journal = j-VLDB-J, volume = "25", number = "2", pages = "197--221", month = apr, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0411-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Mar 25 16:34:05 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Data stream management systems (DSMSs) offer the most effective solution for processing data streams by efficiently executing continuous queries (CQs) over the incoming data. CQs inherently have different levels of criticality and hence different levels of expected quality of service (QoS) and quality of data (QoD). Adhering to such expected QoS/QoD metrics is even more important in cases of multi-tenant data stream management services. In this work, we propose DILoS, a framework that, through priority-based scheduling and load shedding, supports differentiated QoS and QoD for multiple classes of CQs. Unlike existing works that consider scheduling and load shedding separately, DILoS is a novel unified framework that exploits the synergy between scheduling and load shedding. We also propose ALoMa, a general, adaptive load manager that DILoS is built upon. By its design, ALoMa performs better than the state-of-the-art alternatives in three dimensions: (1) it automatically tunes the headroom factor, (2) it honors the delay target, (3) it is applicable to complex query networks with shared operators. We implemented DILoS and ALoMa in our real DSMS prototype system (AQSIOS) and evaluate their performance for a variety of real and synthetic workloads. Our experimental evaluation of ALoMa verified its clear superiority over the state-of-the-art approaches. Our experimental evaluation of the DILoS framework showed that it (a) allows the scheduler and load shedder to consistently honor CQs' priorities, (b) significantly increases system capacity utilization by exploiting batch processing, and (c) enables operator sharing among query classes of different priorities while avoiding priority inversion, i.e., a lower-priority class never blocks a higher-priority one.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Langer:2016:EOD, author = "Philipp Langer and Felix Naumann", title = "Efficient order dependency detection", journal = j-VLDB-J, volume = "25", number = "2", pages = "223--241", month = apr, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0412-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Mar 25 16:34:05 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Order dependencies (ODs) describe a relationship of order between lists of attributes in a relational table. ODs can help to understand the semantics of datasets and the applications producing them. They have applications in the field of query optimization by suggesting query rewrites. Also, the existence of an OD in a table can provide hints on which integrity constraints are valid for the domain of the data at hand. This work is the first to describe the discovery problem for order dependencies in a principled manner by characterizing the search space, developing and proving pruning rules, and presenting the algorithm Order, which finds all order dependencies in a given table. Order traverses the lattice of permutations of attributes in a level-wise bottom-up manner. In a comprehensive evaluation, we show that it is efficient even for various large datasets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Peng:2016:PSQ, author = "Peng Peng and Lei Zou and M. Tamer {\"O}zsu and Lei Chen and Dongyan Zhao", title = "Processing {SPARQL} queries over distributed {RDF} graphs", journal = j-VLDB-J, volume = "25", number = "2", pages = "243--268", month = apr, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0415-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Mar 25 16:34:05 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We propose techniques for processing SPARQL queries over a large RDF graph in a distributed environment. We adopt a ``partial evaluation and assembly'' framework. Answering a SPARQL query Q is equivalent to finding subgraph matches of the query graph Q over RDF graph G. Based on properties of subgraph matching over a distributed graph, we introduce local partial match as partial answers in each fragment of RDF graph G. For assembly, we propose two methods: centralized and distributed assembly. We analyze our algorithms from both theoretically and experimentally. Extensive experiments over both real and benchmark RDF repositories of billions of triples confirm that our method is superior to the state-of-the-art methods in both the system's performance and scalability.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Gao:2016:TCP, author = "Jun Gao and Chang Zhou and Jeffrey Xu Yu", title = "Toward continuous pattern detection over evolving large graph with snapshot isolation", journal = j-VLDB-J, volume = "25", number = "2", pages = "269--290", month = apr, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0416-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Mar 25 16:34:05 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This paper studies continuous pattern detection over large evolving graphs, which plays an important role in monitoring-related applications. The problem is challenging due to the large size and dynamic updates of graphs, the massive search space of pattern detection and inconsistent query results on dynamic graphs. This paper first introduces a snapshot isolation requirement, which ensures that the query results come from a consistent graph snapshot instead of a mixture of partial evolving graphs. Second, we propose an SSD (single sink directed acyclic graph) plan friendly to vertex-centric-distributed graph processing frameworks. SSD plan can guide the message transformation and transfer among graph vertices, and determine the satisfaction of the pattern on graph vertices for the sink vertex. Third, we devise strategies for major steps in the SSD evaluation, including the location of valid messages to achieve snapshot isolation, AO-List to determine the satisfaction of transition rule over dynamic graph, and message-on-change policy to reduce outgoing messages. The experiments on billion-edge graphs using Giraph, an open source implementation of Pregel, illustrate the efficiency and effectiveness of our method.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Baumann:2016:BDC, author = "Stephan Baumann and Peter Boncz and Kai-Uwe Sattler", title = "Bitwise dimensional co-clustering for analytical workloads", journal = j-VLDB-J, volume = "25", number = "3", pages = "291--316", month = jun, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0417-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue May 24 16:31:54 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Analytical workloads in data warehouses often include heavy joins where queries involve multiple fact tables in addition to the typical star-patterns, dimensional grouping and selections. In this paper we propose a new processing and storage framework called bitwise dimensional co-clustering (BDCC) that avoids replication and thus keeps updates fast, yet is able to accelerate all these foreign key joins, efficiently support grouping and pushes down most dimensional selections. The core idea of BDCC is to cluster each table on a mix of dimensions, each possibly derived from attributes imported over an incoming foreign key and this way creating foreign key connected tables with partially shared clusterings. These are later used to accelerate any join between two tables that have some dimension in common and additionally permit to push down and propagate selections (reduce I/O) and accelerate aggregation and ordering operations. Besides the general framework, we describe an algorithm to derive such a physical co-clustering database automatically and describe query processing and query optimization techniques that can easily be fitted into existing relational engines. We present an experimental evaluation on the TPC-H benchmark in the Vectorwise system, showing that co-clustering can significantly enhance its already high performance and at the same time significantly reduce the memory consumption of the system.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2016:EAF, author = "Feifei Li and Ke Yi and Yufei Tao and Bin Yao and Yang Li and Dong Xie and Min Wang", title = "Exact and approximate flexible aggregate similarity search", journal = j-VLDB-J, volume = "25", number = "3", pages = "317--338", month = jun, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0418-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue May 24 16:31:54 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Aggregate similarity search, also known as aggregate nearest-neighbor (Ann) query, finds many useful applications in spatial and multimedia databases. Given a group Q of M query objects, it retrieves from a database the objects most similar to Q, where the similarity is an aggregation (e.g., {{\mathrm{sum}}}sum, \max max) of the distances between each retrieved object p and all the objects in Q. In this paper, we propose an added flexibility to the query definition, where the similarity is an aggregation over the distances between p and any subset of \phi M`?M objects in Q for some {support0$<$} \phi \le 10{$<$}`?{$<$}=1. We call this new definition flexible aggregate similarity search and accordingly refer to a query as a flexible aggregate nearest-neighbor ( Fann ) query. We present algorithms for answering Fann queries exactly and approximately. Our approximation algorithms are especially appealing, which are simple, highly efficient, and work well in both low and high dimensions. They also return near-optimal answers with guaranteed constant-factor approximations in any dimensions. Extensive experiments on large real and synthetic datasets from 2 to 74 dimensions have demonstrated their superior efficiency and high quality.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Guzun:2016:HQO, author = "Gheorghi Guzun and Guadalupe Canahuate", title = "Hybrid query optimization for hard-to-compress bit-vectors", journal = j-VLDB-J, volume = "25", number = "3", pages = "339--354", month = jun, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0419-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue May 24 16:31:54 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Bit-vectors are widely used for indexing and summarizing data due to their efficient processing in modern computers. Sparse bit-vectors can be further compressed to reduce their space requirement. Special compression schemes based on run-length encoders have been designed to avoid explicit decompression and minimize the decoding overhead during query execution. Moreover, highly compressed bit-vectors can exhibit a faster query time than the non-compressed ones. However, for hard-to-compress bit-vectors, compression does not speed up queries and can add considerable overhead. In these cases, bit-vectors are often stored verbatim (non-compressed). On the other hand, queries are answered by executing a cascade of bit-wise operations involving indexed bit-vectors and intermediate results. Often, even when the original bit-vectors are hard to compress, the intermediate results become sparse. It could be feasible to improve query performance by compressing these bit-vectors as the query is executed. In this scenario, it would be necessary to operate verbatim and compressed bit-vectors together. In this paper, we propose a hybrid framework where compressed and verbatim bitmaps can coexist and design algorithms to execute queries under this hybrid model. Our query optimizer is able to decide at run time when to compress or decompress a bit-vector. Our heuristics show that the applications using higher-density bitmaps can benefit from using this hybrid model, improving both their query time and memory utilization.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Harbi:2016:ASQ, author = "Razen Harbi and Ibrahim Abdelaziz and Panos Kalnis and Nikos Mamoulis and Yasser Ebrahim and Majed Sahli", title = "Accelerating {SPARQL} queries by exploiting hash-based locality and adaptive partitioning", journal = j-VLDB-J, volume = "25", number = "3", pages = "355--380", month = jun, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0420-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue May 24 16:31:54 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "State-of-the-art distributed RDF systems partition data across multiple computer nodes (workers). Some systems perform cheap hash partitioning, which may result in expensive query evaluation. Others try to minimize inter-node communication, which requires an expensive data preprocessing phase, leading to a high startup cost. Apriori knowledge of the query workload has also been used to create partitions, which, however, are static and do not adapt to workload changes. In this paper, we propose AdPart, a distributed RDF system, which addresses the shortcomings of previous work. First, AdPart applies lightweight partitioning on the initial data, which distributes triples by hashing on their subjects; this renders its startup overhead low. At the same time, the locality-aware query optimizer of AdPart takes full advantage of the partitioning to (1) support the fully parallel processing of join patterns on subjects and (2) minimize data communication for general queries by applying hash distribution of intermediate results instead of broadcasting, wherever possible. Second, AdPart monitors the data access patterns and dynamically redistributes and replicates the instances of the most frequent ones among workers. As a result, the communication cost for future queries is drastically reduced or even eliminated. To control replication, AdPart implements an eviction policy for the redistributed patterns. Our experiments with synthetic and real data verify that AdPart: (1) starts faster than all existing systems; (2) processes thousands of queries before other systems become online; and (3) gracefully adapts to the query load, being able to evaluate queries on billion-scale RDF data in subseconds.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Bonifati:2016:MEO, author = "Angela Bonifati and Werner Nutt and Riccardo Torlone and Jan {Van Den Bussche}", title = "Mapping-equivalence and oid-equivalence of single-function object-creating conjunctive queries", journal = j-VLDB-J, volume = "25", number = "3", pages = "381--397", month = jun, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0421-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue May 24 16:31:54 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Conjunctive database queries have been extended with a mechanism for object creation to capture important applications such as data exchange, data integration, and ontology-based data access. Object creation generates new object identifiers in the result that do not belong to the set of constants in the source database. The new object identifiers can be also seen as Skolem terms. Hence, object-creating conjunctive queries can also be regarded as restricted second-order tuple-generating dependencies (SO-tgds), considered in the data exchange literature. In this paper, we focus on the class of single-function object-creating conjunctive queries, or sifo CQs for short. The single-function symbol can be used only once in the head of the query. We give a new characterization for oid-equivalence of sifo CQs that is simpler than the one given by Hull and Yoshikawa and places the problem in the complexity class NP. Our characterization is based on Cohen's equivalence notions for conjunctive queries with multiplicities. We also solve the logical entailment problem for sifo CQs, showing that also this problem belongs to NP. Results by Pichler et al. have shown that logical equivalence for more general classes of SO-tgds is either undecidable or decidable with as yet unknown complexity upper bounds.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lu:2016:DCE, author = "Yue Lu and Yuguan Li and Mohamed Y. Eltabakh", title = "Decorating the cloud: enabling annotation management in {MapReduce}", journal = j-VLDB-J, volume = "25", number = "3", pages = "399--424", month = jun, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0422-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue May 24 16:31:54 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Data curation and annotation are indispensable mechanisms to a wide range of applications for capturing various types of metadata information. This metadata not only increases the data's credibility and merit, and allows end users and applications to make more informed decisions, but also enables advanced processing over the data that is not feasible otherwise. That is why annotation management has been extensively studied in the context of scientific repositories, web documents, and relational database systems. In this paper, we make the case that cloud-based applications that rely on the emerging Hadoop infrastructure are also in need for data curation and annotation and that the presence of such mechanisms in Hadoop would bring value-added capabilities to these applications. We propose the ``CloudNotes'' system, a full-fledged MapReduce-based annotation management engine. CloudNotes addresses several new challenges to annotation management including: (1) scalable and distributed processing of annotations over large clusters, (2) propagation of annotations under the MapReduce's blackbox execution model, and (3) annotation-driven optimizations ranging from proactive prefetching and colocation of annotations, annotation-aware task scheduling, novel shared execution strategies among the annotation jobs, and concurrency control mechanisms for annotation management. These challenges have not been addressed or explored before by the state-of-art technologies. CloudNotes is built on top of the open-source Hadoop/HDFS infrastructure and experimentally evaluated to demonstrate the practicality and scalability of its features, and the effectiveness of its optimizations under large workloads.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Sariyuce:2016:IKC, author = "Ahmet Erdem Sariy{\"u}ce and Bugra Gedik and Gabriela Jacques-Silva and Kun-Lung Wu and {\"U}mit V. {\c{C}}ataly{\"u}rek", title = "Incremental $k$-core decomposition: algorithms and evaluation", journal = j-VLDB-J, volume = "25", number = "3", pages = "425--447", month = jun, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0423-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue May 24 16:31:54 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A $k$-core of a graph is a maximal connected subgraph in which every vertex is connected to at least k vertices in the subgraph. $k$-core decomposition is often used in large-scale network analysis, such as community detection, protein function prediction, visualization, and solving NP-hard problems on real networks efficiently, like maximal clique finding. In many real-world applications, networks change over time. As a result, it is essential to develop efficient incremental algorithms for dynamic graph data. In this paper, we propose a suite of incremental $k$-core decomposition algorithms for dynamic graph data. These algorithms locate a small subgraph that is guaranteed to contain the list of vertices whose maximum $k$-core values have changed and efficiently process this subgraph to update the $k$-core decomposition. We present incremental algorithms for both insertion and deletion operations, and propose auxiliary vertex state maintenance techniques that can further accelerate these operations. Our results show a significant reduction in runtime compared to non-incremental alternatives. We illustrate the efficiency of our algorithms on different types of real and synthetic graphs, at varying scales. For a graph of 16 million vertices, we observe relative throughputs reaching a million times, relative to the non-incremental algorithms.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Luo:2016:QDS, author = "Ge Luo and Lu Wang and Ke Yi and Graham Cormode", title = "Quantiles over data streams: experimental comparisons, new analyses, and further improvements", journal = j-VLDB-J, volume = "25", number = "4", pages = "449--472", month = aug, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0424-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 21 06:41:51 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A fundamental problem in data management and analysis is to generate descriptions of the distribution of data. It is most common to give such descriptions in terms of the cumulative distribution, which is characterized by the quantiles of the data. The design and engineering of efficient methods to find these quantiles has attracted much study, especially in the case where the data are given incrementally, and we must compute the quantiles in an online, streaming fashion. While such algorithms have proved to be extremely useful in practice, there has been limited formal comparison of the competing methods, and no comprehensive study of their performance. In this paper, we remedy this deficit by providing a taxonomy of different methods and describe efficient implementations. In doing so, we propose new variants that have not been studied before, yet which outperform existing methods. To illustrate this, we provide detailed experimental comparisons demonstrating the trade-offs between space, time, and accuracy for quantile computation.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Xie:2016:EEI, author = "Xike Xie and Benjin Mei and Jinchuan Chen and Xiaoyong Du and Christian S. Jensen", title = "{Elite}: an elastic infrastructure for big spatiotemporal trajectories", journal = j-VLDB-J, volume = "25", number = "4", pages = "473--493", month = aug, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0425-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 21 06:41:51 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "As the volumes of spatiotemporal trajectory data continue to grow at a rapid pace; a new generation of data management techniques is needed in order to be able to utilize these data to provide a range of data-driven services, including geographic-type services. Key challenges posed by spatiotemporal data include the massive data volumes, the high velocity with which the data are captured, the need for interactive response times, and the inherent inaccuracy of the data. We propose an infrastructure, Elite, that leverages peer-to-peer and parallel computing techniques to address these challenges. The infrastructure offers efficient, parallel update and query processing by organizing the data into a layered index structure that is logically centralized, but physically distributed among computing nodes. The infrastructure is elastic with respect to storage, meaning that it adapts to fluctuations in the storage volume, and with respect to computation, meaning that the degree of parallelism can be adapted to best match the computational requirements. Further, the infrastructure offers advanced functionality, including probabilistic simulations, for contending with the inaccuracy of the underlying data in query processing. Extensive empirical studies offer insight into properties of the infrastructure and indicate that it meets its design goals, thus enabling the effective management of big spatiotemporal data.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Kanza:2016:ESF, author = "Yaron Kanza and Hadas Yaari", title = "External sorting on flash storage: reducing cell wearing and increasing efficiency by avoiding intermediate writes", journal = j-VLDB-J, volume = "25", number = "4", pages = "495--518", month = aug, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0426-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 21 06:41:51 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This paper studies the problem of how to conduct external sorting on flash drives while avoiding intermediate writes to the disk. The focus is on sort in portable electronic devices, where relations are only larger than the main memory by a small factor, and on sort as part of distributed processes where relations are frequently partially sorted. In such cases, sort algorithms that refrain from writing intermediate results to the disk have three advantages over algorithms that perform intermediate writes. First, on devices in which read operations are much faster than writes, such methods are efficient and frequently outperform Merge Sort. Secondly, they reduce flash cell degradation caused by writes. Thirdly, they can be used in cases where there is not enough disk space for the intermediate results. Novel sort algorithms that avoid intermediate writes to the disk are presented. An experimental evaluation, on different flash storage devices, shows that in many cases the new algorithms can extend the lifespan of the devices by avoiding unnecessary writes to the disk, while maintaining efficiency, in comparison with Merge Sort.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Jeon:2016:MBS, author = "Inah Jeon and Evangelos E. Papalexakis and Christos Faloutsos and Lee Sael and U. Kang", title = "Mining billion-scale tensors: algorithms and discoveries", journal = j-VLDB-J, volume = "25", number = "4", pages = "519--544", month = aug, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0427-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 21 06:45:26 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "How can we analyze large-scale real-world data with various attributes? Many real-world data (e.g., network traffic logs, web data, social networks, knowledge bases, and sensor streams) with multiple attributes are represented as multi-dimensional arrays, called tensors. For analyzing a tensor, tensor decompositions are widely used in many data mining applications: detecting malicious attackers in network traffic logs (with source IP, destination IP, port-number, timestamp), finding telemarketers in a phone call history (with sender, receiver, date), and identifying interesting concepts in a knowledge base (with subject, object, relation). However, current tensor decomposition methods do not scale to large and sparse real-world tensors with millions of rows and columns and `fibers.' In this paper, we propose HaTen2, a distributed method for large-scale tensor decompositions that runs on the MapReduce framework. Our careful design and implementation of HaTen2 dramatically reduce the size of intermediate data and the number of jobs leading to achieve high scalability compared with the state-of-the-art method. Thanks to HaTen2, we analyze big real-world sparse tensors that cannot be handled by the current state of the art, and discover hidden concepts.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Islam:2016:KYC, author = "Md. Saiful Islam and Chengfei Liu", title = "Know your customer: computing $k$-most promising products for targeted marketing", journal = j-VLDB-J, volume = "25", number = "4", pages = "545--570", month = aug, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0428-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 21 06:45:26 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The advancement of World Wide Web has revolutionized the way the manufacturers can do business. The manufacturers can collect customer preferences for products and product features from their sales and other product-related Web sites to enter and sustain in the global market. For example, the manufactures can make intelligent use of these customer preference data to decide on which products should be selected for targeted marketing. However, the selected products must attract as many customers as possible to increase the possibility of selling more than their respective competitors. This paper addresses this kind of product selection problem. That is, given a database of existing products P from the competitors, a set of company's own products Q, a dataset C of customer preferences and a positive integer k, we want to find k-most promising products (k-MPP) from Q with maximum expected number of total customers for targeted marketing. We model k-MPP query and propose an algorithmic framework for processing such query and its variants. Our framework utilizes grid-based data partitioning scheme and parallel computing techniques to realize k-MPP query. The effectiveness and efficiency of the framework are demonstrated by conducting extensive experiments with real and synthetic datasets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Kohler:2016:PCK, author = "Henning K{\"o}hler and Uwe Leck and Sebastian Link and Xiaofang Zhou", title = "Possible and certain keys for {SQL}", journal = j-VLDB-J, volume = "25", number = "4", pages = "571--596", month = aug, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0430-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 21 06:45:26 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Driven by the dominance of the relational model and the requirements of modern applications, we revisit the fundamental notion of a key in relational databases with NULL. In SQL, primary key columns are NOT NULL, and UNIQUE constraints guarantee uniqueness only for tuples without NULL. We investigate the notions of possible and certain keys, which are keys that hold in some or all possible worlds that originate from an SQL table, respectively. Possible keys coincide with UNIQUE, thus providing a semantics for their syntactic definition in the SQL standard. Certain keys extend primary keys to include NULL columns and can uniquely identify entities whenever feasible, while primary keys may not. In addition to basic characterization, axiomatization, discovery, and extremal combinatorics problems, we investigate the existence and construction of Armstrong tables, and describe an indexing scheme for enforcing certain keys. Our experiments show that certain keys with NULLs occur in real-world data, and related computational problems can be solved efficiently. Certain keys are therefore semantically well founded and able to meet Codd's entity integrity rule while handling high volumes of incomplete data from different formats.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Mottin:2016:HPA, author = "Davide Mottin and Alice Marascu and Senjuti Basu Roy and Gautam Das and Themis Palpanas and Yannis Velegrakis", title = "A holistic and principled approach for the empty-answer problem", journal = j-VLDB-J, volume = "25", number = "4", pages = "597--622", month = aug, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0431-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 21 06:45:26 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We propose a principled optimization-based interactive query relaxation framework for queries that return no answers. Given an initial query that returns an empty-answer set, our framework dynamically computes and suggests alternative queries with fewer conditions than those the user has initially requested, in order to help the user arrive at a query with a non-empty-answer, or at a query for which no matter how many additional conditions are ignored, the answer will still be empty. Our proposed approach for suggesting query relaxations is driven by a novel probabilistic framework based on optimizing a wide variety of application-dependent objective functions. We describe optimal and approximate solutions of different optimization problems using the framework. Moreover, we discuss two important extensions to the base framework: the specification of a minimum size on the number of results returned by a relaxed query and the possibility of proposing multiple conditions at the same time. We analyze the proposed solutions, experimentally verify their efficiency and effectiveness, and illustrate their advantages over the existing approaches.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Boncz:2016:SIM, author = "Peter Boncz and Wolfgang Lehner and Thomas Neumann", title = "Special Issue: Modern Hardware", journal = j-VLDB-J, volume = "25", number = "5", pages = "623--624", month = oct, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0440-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Sep 12 18:50:32 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Porobic:2016:CIH, author = "Danica Porobic and Ippokratis Pandis and Miguel Branco and Pinar T{\"o}z{\"u}n and Anastasia Ailamaki", title = "Characterization of the Impact of Hardware Islands on {OLTP}", journal = j-VLDB-J, volume = "25", number = "5", pages = "625--650", month = oct, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0413-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Sep 12 18:50:32 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Modern hardware is abundantly parallel and increasingly heterogeneous. The numerous processing cores have non-uniform access latencies to the main memory and processor caches, which causes variability in the communication costs. Unfortunately, database systems mostly assume that all processing cores are the same and that microarchitecture differences are not significant enough to appear in critical database execution paths. As we demonstrate in this paper, however, non-uniform core topology does appear in the critical path and conventional database architectures achieve suboptimal and even worse, unpredictable performance. We perform a detailed performance analysis of OLTP deployments in servers with multiple cores per CPU (multicore) and multiple CPUs per server (multisocket). We compare different database deployment strategies where we vary the number and size of independent database instances running on a single server, from a single shared-everything instance to fine-grained shared-nothing configurations. We quantify the impact of non-uniform hardware on various deployments by (a) examining how efficiently each deployment uses the available hardware resources and (b) measuring the impact of distributed transactions and skewed requests on different workloads. We show that no strategy is optimal for all cases and that the best choice depends on the combination of hardware topology and workload characteristics. Finally, we argue that transaction processing systems must be aware of the hardware topology in order to achieve predictably high performance.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Sadoghi:2016:ESO, author = "Mohammad Sadoghi and Kenneth A. Ross and Mustafa Canim and Bishwaranjan Bhattacharjee", title = "Exploiting {SSDs} in operational multiversion databases", journal = j-VLDB-J, volume = "25", number = "5", pages = "651--672", month = oct, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0410-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Sep 12 18:50:32 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Multiversion databases store both current and historical data. Rows are typically annotated with timestamps representing the period when the row is/was valid. We develop novel techniques to reduce index maintenance in multiversion databases, so that indexes can be used effectively for analytical queries over current data without being a heavy burden on transaction throughput. To achieve this end, we re-design persistent index data structures in the storage hierarchy to employ an extra level of indirection. The indirection level is stored on solid-state disks that can support very fast random I/Os, so that traversing the extra level of indirection incurs a relatively small overhead. The extra level of indirection dramatically reduces the number of magnetic disk I/Os that are needed for index updates and localizes maintenance to indexes on updated attributes. Additionally, we batch insertions within the indirection layer in order to reduce physical disk I/Os for indexing new records. In this work, we further exploit SSDs by introducing novel DeltaBlock techniques for storing the recent changes to data on SSDs. Using our DeltaBlock, we propose an efficient method to periodically flush the recently changed data from SSDs to HDDs such that, on the one hand, we keep track of every change (or delta) for every record, and, on the other hand, we avoid redundantly storing the unchanged portion of updated records. By reducing the index maintenance overhead on transactions, we enable operational data stores to create more indexes to support queries. We have developed a prototype of our indirection proposal by extending the widely used generalized search tree open-source project, which is also employed in PostgreSQL. Our working implementation demonstrates that we can significantly reduce index maintenance and/or query processing cost by a factor of 3. For the insertion of new records, our novel batching technique can save up to 90 \% of the insertion time. For updates, our prototype demonstrates that we can significantly reduce the database size by up to 80 \% even with a modest space allocated for DeltaBlocks on SSDs.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Kang:2016:FCE, author = "Woon-Hak Kang and Sang-Won Lee and Bongki Moon", title = "Flash as cache extension for online transactional workloads", journal = j-VLDB-J, volume = "25", number = "5", pages = "673--694", month = oct, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0414-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Sep 12 18:50:32 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Considering the current price gap between hard disk and flash memory SSD storages, for applications dealing with large-scale data, it will be economically more sensible to use flash memory drives to supplement disk drives rather than to replace them. This paper presents FaCE, which is a new low-overhead caching strategy that uses flash memory as an extension to the RAM buffer of database systems. FaCE aims at improving the transaction throughput as well as shortening the recovery time from a system failure. To achieve the goals, we propose two novel algorithms for flash cache management, namely multi-version FIFO replacement and group second chance. This was possible due to flash write optimization as well as disk access reduction obtained by the FaCE caching methods. In addition, FaCE takes advantage of the nonvolatility of flash memory to fully support database recovery by extending the scope of a persistent database to include the data pages stored in the flash cache. We have implemented FaCE in the PostgreSQL open-source database server and demonstrated its effectiveness for TPC-C benchmarks in comparison with existing caching methods such as Lazy Cleaning and Linux Bcache.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Jin:2016:RWO, author = "Peiquan Jin and Chengcheng Yang and Christian S. Jensen and Puyuan Yang and Lihua Yue", title = "Read\slash write-optimized tree indexing for solid-state drives", journal = j-VLDB-J, volume = "25", number = "5", pages = "695--717", month = oct, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0406-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Sep 12 18:50:32 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Flash-memory-based solid-state drives (SSDs) are used widely for secondary storage. To be effective for SSDs, traditional indices have to be redesigned to cope with the special properties of flash memory, such as asymmetric read/write latencies (fast reads and slow writes) and out-of-place updates. Previous flash-optimized indices focus mainly on reducing random writes to SSDs, which is typically accomplished at the expense of a substantial number of extra reads. However, modern SSDs show a narrowing gap between read and write speeds, and read operations on SSDs increasingly affect the overall performance of indices on SSDs. As a consequence, how to optimize SSD-aware indices by reducing both write and read costs is a pertinent and open challenge. We propose a new tree index for SSDs that is able to reduce both writes and extra reads. In particular, we use an update buffer and overflow pages to reduce random writes, and we further exploit Bloom filters to reduce the extra reads to the overflow nodes in the tree. With this mechanism, we construct a read/write-optimized index that is capable of offering better overall performance than previous flash-aware indices. In addition, we present an analysis of the proposed index and show that the read and write costs of the operations on the index can be balanced by only tuning the false-positive rate of the Bloom filters. Our experimental results suggest that our proposal is efficient and represents an improvement over existing methods.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Sitaridi:2016:GAS, author = "Evangelia A. Sitaridi and Kenneth A. Ross", title = "{GPU}-accelerated string matching for database applications", journal = j-VLDB-J, volume = "25", number = "5", pages = "719--740", month = oct, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-015-0409-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Sep 12 18:50:32 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/string-matching.bib; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Implementations of relational operators on GPU processors have resulted in order of magnitude speedups compared to their multicore CPU counterparts. Here we focus on the efficient implementation of string matching operators common in SQL queries. Due to different architectural features the optimal algorithm for CPUs might be suboptimal for GPUs. GPUs achieve high memory bandwidth by running thousands of threads, so it is not feasible to keep the working set of all threads in the cache in a naive implementation. In GPUs the unit of execution is a group of threads and in the presence of loops and branches, threads in a group have to follow the same execution path; if some threads diverge, then different paths are serialized. We study the cache memory efficiency of single- and multi-pattern string matching algorithms for conventional and pivoted string layouts in the GPU memory. We evaluate the memory efficiency in terms of memory access pattern and achieved memory bandwidth for different parallelization methods. To reduce thread divergence, we split string matching into multiple steps. We evaluate the different matching algorithms in terms of average- and worst-case performance and compare them against state-of-the-art CPU and GPU libraries. Our experimental evaluation shows that thread and memory efficiency affect performance significantly and that our proposed methods outperform previous CPU and GPU algorithms in terms of raw performance and power efficiency. The Knuth---Morris---Pratt algorithm is a good choice for GPUs because its regular memory access pattern makes it amenable to several GPU optimizations.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Mottin:2016:EQN, author = "Davide Mottin and Matteo Lissandrini and Yannis Velegrakis and Themis Palpanas", title = "Exemplar queries: a new way of searching", journal = j-VLDB-J, volume = "25", number = "6", pages = "741--765", month = dec, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0429-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Nov 10 18:03:04 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Modern search engines employ advanced techniques that go beyond the structures that strictly satisfy the query conditions in an effort to better capture the user intentions. In this work, we introduce a novel query paradigm that considers a user query as an example of the data in which the user is interested. We call these queries exemplar queries. We provide a formal specification of their semantics and show that they are fundamentally different from notions like queries by example, approximate queries and related queries. We provide an implementation of these semantics for knowledge graphs and present an exact solution with a number of optimizations that improve performance without compromising the result quality. We study two different congruence relations, isomorphism and strong simulation, for identifying the answers to an exemplar query. We also provide an approximate solution that prunes the search space and achieves considerably better time performance with minimal or no impact on effectiveness. The effectiveness and efficiency of these solutions with synthetic and real datasets are experimentally evaluated, and the importance of exemplar queries in practice is illustrated.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2016:EDL, author = "Yuhong Li and Leong Hou U. and Man Lung Yiu and Zhiguo Gong", title = "Efficient discovery of longest-lasting correlation in sequence databases", journal = j-VLDB-J, volume = "25", number = "6", pages = "767--790", month = dec, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0432-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Nov 10 18:03:04 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The search for similar subsequences is a core module for various analytical tasks in sequence databases. Typically, the similarity computations require users to set a length. However, there is no robust means by which to define the proper length for different application needs. In this study, we examine a new query that is capable of returning the longest-lasting highly correlated subsequences in a sequence database, which is particularly helpful to analyses without prior knowledge regarding the query length. A baseline, yet expensive, solution is to calculate the correlations for every possible subsequence length. To boost performance, we study a space-constrained index that provides a tight correlation bound for subsequences of similar lengths and offset by intraobject and interobject grouping techniques. To the best of our knowledge, this is the first index to support a normalized distance metric of arbitrary length subsequences. In addition, we study the use of a smart cache for disk-resident data (e.g., millions of sequence objects) and a graph processing unit-based parallel processing technique for frequently updated data (e.g., nonindexable streaming sequences) to compute the longest-lasting highly correlated subsequences. Extensive experimental evaluation on both real and synthetic sequence datasets verifies the efficiency and effectiveness of our proposed methods.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Fakas:2016:DPS, author = "Georgios J. Fakas and Zhi Cai and Nikos Mamoulis", title = "Diverse and proportional size-$l$ object summaries using pairwise relevance", journal = j-VLDB-J, volume = "25", number = "6", pages = "791--816", month = dec, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0433-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Nov 10 18:03:04 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The abundance and ubiquity of graphs (e.g., online social networks such as Google++ and Facebook; bibliographic graphs such as DBLP) necessitates the effective and efficient search over them. Given a set of keywords that can identify a data subject (DS), a recently proposed keyword search paradigm produces a set of object summaries (OSs) as results. An OS is a tree structure rooted at the DS node (i.e., a node containing the keywords) with surrounding nodes that summarize all data held on the graph about the DS. OS snippets, denoted as size-l OSs, have also been investigated. A size-l OS is a partial OS containing l nodes such that the summation of their importance scores results in the maximum possible total score. However, the set of nodes that maximize the total importance score may result in an uninformative size-l OSs, as very important nodes may be repeated in it, dominating other representative information. In view of this limitation, in this paper, we investigate the effective and efficient generation of two novel types of OS snippets, i.e., diverse and proportional size-l OSs, denoted as DSize-l and PSize-l OSs. Namely, besides the importance of each node, we also consider its pairwise relevance (similarity) to the other nodes in the OS and the snippet. We conduct an extensive evaluation on two real graphs (DBLP and Google++). We verify effectiveness by collecting user feedback, e.g., by asking DBLP authors (i.e., the DSs themselves) to evaluate our results. In addition, we verify the efficiency of our algorithms and evaluate the quality of the snippets that they produce.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{BOgh:2016:SPW, author = "Kenneth S. B{\O}gh and Sean Chester and Ira Assent", title = "{SkyAlign}: a portable, work-efficient skyline algorithm for multicore and {GPU} architectures", journal = j-VLDB-J, volume = "25", number = "6", pages = "817--841", month = dec, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0438-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Nov 10 18:03:04 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The skyline operator determines points in a multidimensional dataset that offer some optimal trade-off. State-of-the-art CPU skyline algorithms exploit quad-tree partitioning with complex branching to minimise the number of point-to-point comparisons. Branch-phobic GPU skyline algorithms rely on compute throughput rather than partitioning, but fail to match the performance of sequential algorithms. In this paper, we introduce a new skyline algorithm, SkyAlign, that is designed for the GPU, and a GPU-friendly, grid-based tree structure upon which the algorithm relies. The search tree allows us to dramatically reduce the amount of work done by the GPU algorithm by avoiding most point-to-point comparisons at the cost of some compute throughput. This trade-off allows SkyAlign to achieve orders of magnitude faster performance than its predecessors. Moreover, a NUMA-oblivious port of SkyAlign outperforms native multicore state of the art on challenging workloads by an increasing margin as more cores and sockets are utilised.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zoumpatianos:2016:AAD, author = "Kostas Zoumpatianos and Stratos Idreos and Themis Palpanas", title = "{ADS}: the adaptive data series index", journal = j-VLDB-J, volume = "25", number = "6", pages = "843--866", month = dec, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0442-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Nov 10 18:03:04 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Numerous applications continuously produce big amounts of data series, and in several time critical scenarios analysts need to be able to query these data as soon as they become available. This, however, is not currently possible with the state-of-the-art indexing methods and for very large data series collections. In this paper, we present the first adaptive indexing mechanism, specifically tailored to solve the problem of indexing and querying very large data series collections. We present a detailed design and evaluation of our method using approximate and exact query algorithms with both synthetic and real data sets. Adaptive indexing significantly outperforms previous solutions, gracefully handling large data series collections, reducing the data to query delay: By the time state-of-the-art indexing techniques finish indexing 1 billion data series (and before answering even a single query), our method has already answered 3*10^53`?105 queries.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Liu:2016:AWW, author = "Qing Liu and Yunjun Gao and Gang Chen and Baihua Zheng and Linlin Zhou", title = "Answering why-not and why questions on reverse top-$k$ queries", journal = j-VLDB-J, volume = "25", number = "6", pages = "867--892", month = dec, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0443-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Nov 10 18:03:04 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Why-not and why questions can be posed by database users to seek clarifications on unexpected query results. Specifically, why-not questions aim to explain why certain expected tuples are absent from the query results, while why questions try to clarify why certain unexpected tuples are present in the query results. This paper systematically explores the why-not and why questions on reverse top-$k$ queries, owing to its importance in multi-criteria decision making. We first formalize why-not questions on reverse top-$k$ queries, which try to include the missing objects in the reverse top-$k$ query results, and then, we propose a unified framework called WQRTQ to answer why-not questions on reverse top-$k$ queries. Our framework offers three solutions to cater for different application scenarios. Furthermore, we study why questions on reverse top-$k$ queries, which aim to exclude the undesirable objects from the reverse top-$k$ query results, and extend the framework WQRTQ to efficiently answer why questions on reverse top-$k$ queries, which demonstrates the flexibility of our proposed algorithms. Extensive experimental evaluation with both real and synthetic data sets verifies the effectiveness and efficiency of the presented algorithms under various experimental settings.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chen:2016:SSL, author = "Yang Chen and Daisy Zhe Wang and Sean Goldberg", title = "{ScaLeKB}: scalable learning and inference over large knowledge bases", journal = j-VLDB-J, volume = "25", number = "6", pages = "893--918", month = dec, year = "2016", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0444-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Nov 10 18:03:04 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Recent years have seen a drastic rise in the construction of web knowledge bases (e.g., Freebase, YAGO, DBPedia). These knowledge bases store structured information about real-world people, places, organizations, etc. However, due to the limitations of human knowledge, web corpora, and information extraction algorithms, the knowledge bases are still far from complete. To infer the missing knowledge, we propose the Ontological Pathfinding (OP) algorithm to mine first-order inference rules from these web knowledge bases. The OP algorithm scales up via a series of optimization techniques, including a new parallel-rule-mining algorithm, a pruning strategy to eliminate unsound and inefficient rules before applying them, and a novel partitioning algorithm to break the learning task into smaller independent sub-tasks. Combining these techniques, we develop a first rule mining system that scales to Freebase, the largest public knowledge base with 112 million entities and 388 million facts. We mine 36,625 inference rules in 34 h; no existing system achieves this scale. Based on the mining algorithm and the optimizations, we develop an efficient inference engine. As a result, we infer 0.9 billion new facts from Freebase in 17.19 h. We use cross validation to evaluate the inferred facts and estimate a degree of expansion by 0.6 over Freebase, with a precision approaching 1.0. Our approach outperforms state-of-the-art mining algorithms and inference engines in terms of both performance and quality.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2017:SIB, author = "Chen Li and Volker Markl", title = "Special issue on best papers of {VLDB 2015}", journal = j-VLDB-J, volume = "26", number = "1", pages = "1--2", month = feb, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0450-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Mar 12 10:52:26 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "See erratum \cite{Li:2017:ESI}.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2017:ESI, author = "Chen Li and Volker Markl", title = "Erratum to: {Special issue on best papers of VLDB 2015}", journal = j-VLDB-J, volume = "26", number = "1", pages = "3--3", month = feb, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0458-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Mar 12 10:52:26 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "See \cite{Li:2017:SIB}.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Gatterbauer:2017:DPA, author = "Wolfgang Gatterbauer and Dan Suciu", title = "Dissociation and propagation for approximate lifted inference with standard relational database management systems", journal = j-VLDB-J, volume = "26", number = "1", pages = "5--30", month = feb, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0434-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Mar 12 10:52:26 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Probabilistic inference over large data sets is a challenging data management problem since exact inference is generally \#P-hard and is most often solved approximately with sampling-based methods today. This paper proposes an alternative approach for approximate evaluation of conjunctive queries with standard relational databases: In our approach, every query is evaluated entirely in the database engine by evaluating a fixed number of query plans, each providing an upper bound on the true probability, then taking their minimum. We provide an algorithm that takes into account important schema information to enumerate only the minimal necessary plans among all possible plans. Importantly, this algorithm is a strict generalization of all known PTIME self-join-free conjunctive queries: A query is in PTIME if and only if our algorithm returns one single plan. Furthermore, our approach is a generalization of a family of efficient ranking methods from graphs to hypergraphs. We also adapt three relational query optimization techniques to evaluate all necessary plans very fast. We give a detailed experimental evaluation of our approach and, in the process, provide a new way of thinking about the value of probabilistic methods over non-probabilistic methods for ranking query answers. We also note that the techniques developed in this paper apply immediately to lifted inference from statistical relational models since lifted inference corresponds to PTIME plans in probabilistic databases.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2017:RBR, author = "Jiexing Li and Jeffrey F. Naughton and Rimma V. Nehme", title = "Resource bricolage and resource selection for parallel database systems", journal = j-VLDB-J, volume = "26", number = "1", pages = "31--54", month = feb, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0435-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Mar 12 10:52:26 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Running parallel database systems in an environment with heterogeneous resources has become increasingly common, due to cluster evolution and increasing interest in moving applications into public clouds. Performance differences among machines in the same cluster pose new challenges for parallel database systems. First, for database systems running in a heterogeneous cluster, the default uniform data partitioning strategy may overload some of the slow machines, while at the same time it may underutilize the more powerful machines. Since the processing time of a parallel query is determined by the slowest machine, such an allocation strategy may result in a significant query performance degradation. Second, since machines might have varying resources or performance, different choices of machines may lead to different costs or performance for executing the same workload. By carefully selecting the most suitable machines for running a workload, we may achieve better performance with the same budget, or we may meet the same performance requirements with a lower cost. We address these challenges by introducing techniques we call resource bricolage and resource selection that improve database performance in heterogeneous environments. Our approaches quantify the performance differences among machines with various resources as they process workloads with diverse resource requirements. For the purpose of better resource utilization, we formalize the problem of minimizing workload execution time and view it as an optimization problem, and then, we employ linear programming to obtain a recommended data partitioning scheme. For the purpose of better resource selection, we formalize two problems: One minimizes the total workload execution time with a given budget, and the other minimizes the total budget with a given performance target. We then employ different mixed-integer programs to search for the optimal resource selection decisions. We verify the effectiveness of both resource bricolage and resource selection techniques with an extensive experimental study.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Finis:2017:OIS, author = "Jan Finis and Robert Brunel and Alfons Kemper and Thomas Neumann and Norman May and Franz Faerber", title = "{Order Indexes}: supporting highly dynamic hierarchical data in relational main-memory database systems", journal = j-VLDB-J, volume = "26", number = "1", pages = "55--80", month = feb, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0436-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Mar 12 10:52:26 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Maintaining and querying hierarchical data in a relational database system is an important task in many business applications. This task is especially challenging when considering dynamic use cases with a high rate of complex, possibly skewed structural updates. Labeling schemes are widely considered the indexing technique of choice for hierarchical data, and many different schemes have been proposed. However, they cannot handle dynamic use cases well due to various problems, which we investigate in this paper. We therefore propose Order Indexes--a dynamic representation of the nested intervals encoding--which offer competitive query performance, unprecedented update efficiency, and robustness for highly dynamic workloads.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Sa:2017:IKB, author = "Christopher Sa and Alex Ratner and Christopher R{\'e} and Jaeho Shin and Feiran Wang and Sen Wu and Ce Zhang", title = "Incremental knowledge base construction using {DeepDive}", journal = j-VLDB-J, volume = "26", number = "1", pages = "81--105", month = feb, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0437-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Mar 12 10:52:26 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Populating a database with information from unstructured sources--also known as knowledge base construction (KBC)--is a long-standing problem in industry and research that encompasses problems of extraction, cleaning, and integration. In this work, we describe DeepDive, a system that combines database and machine learning ideas to help develop KBC systems, and we present techniques to make the KBC process more efficient. We observe that the KBC process is iterative, and we develop techniques to incrementally produce inference results for KBC systems. We propose two methods for incremental inference, based, respectively, on sampling and variational techniques. We also study the trade-off space of these methods and develop a simple rule-based optimizer. DeepDive includes all of these contributions, and we evaluate DeepDive on five KBC systems, showing that it can speed up KBC inference tasks by up to two orders of magnitude with negligible impact on quality.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Trummer:2017:MOP, author = "Immanuel Trummer and Christoph Koch", title = "Multi-objective parametric query optimization", journal = j-VLDB-J, volume = "26", number = "1", pages = "107--124", month = feb, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0439-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Mar 12 10:52:26 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Classical query optimization compares query plans according to one cost metric and associates each plan with a constant cost value. In this paper, we introduce the multi-objective parametric query optimization (MPQO) problem where query plans are compared according to multiple cost metrics and the cost of a given plan according to a given metric is modeled as a function that depends on multiple parameters. The cost metrics may, for instance, include execution time or monetary fees; a parameter may represent the selectivity of a query predicate that is unspecified at optimization time. MPQO generalizes parametric query optimization (which allows multiple parameters but only one cost metric) and multi-objective query optimization (which allows multiple cost metrics but no parameters). We formally analyze the novel MPQO problem and show why existing algorithms are inapplicable. We present a generic algorithm for MPQO and a specialized version for MPQO with piecewise-linear plan cost functions. We prove that both algorithms find all relevant query plans and experimentally evaluate the performance of our second algorithm in multiple scenarios.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Khayyat:2017:FSI, author = "Zuhair Khayyat and William Lucia and Meghna Singh and Mourad Ouzzani and Paolo Papotti and Jorge-Arnulfo Quian{\'e}-Ruiz and Nan Tang and Panos Kalnis", title = "Fast and scalable inequality joins", journal = j-VLDB-J, volume = "26", number = "1", pages = "125--150", month = feb, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0441-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Mar 12 10:52:26 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Inequality joins, which is to join relations with inequality conditions, are used in various applications. Optimizing joins has been the subject of intensive research ranging from efficient join algorithms such as sort-merge join, to the use of efficient indices such as B^+B+-tree, R^*R`?-tree and Bitmap. However, inequality joins have received little attention and queries containing such joins are notably very slow. In this paper, we introduce fast inequality join algorithms based on sorted arrays and space-efficient bit-arrays. We further introduce a simple method to estimate the selectivity of inequality joins which is then used to optimize multiple predicate queries and multi-way joins. Moreover, we study an incremental inequality join algorithm to handle scenarios where data keeps changing. We have implemented a centralized version of these algorithms on top of PostgreSQL, a distributed version on top of Spark SQL, and an existing data cleaning system, Nadeef. By comparing our algorithms against well-known optimization techniques for inequality joins, we show our solution is more scalable and several orders of magnitude faster.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yang:2017:RKN, author = "Shiyu Yang and Muhammad Aamir Cheema and Xuemin Lin and Ying Zhang and Wenjie Zhang", title = "Reverse $k$ nearest neighbors queries and spatial reverse top-$k$ queries", journal = j-VLDB-J, volume = "26", number = "2", pages = "151--176", month = apr, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0445-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Mar 27 20:55:44 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Given a set of facilities and a set of users, a reverse k nearest neighbors (RkNN) query q returns every user for which the query facility is one of the k closest facilities. Almost all of the existing techniques to answer RkNN queries adopt a pruning-and-verification framework. Regions-based pruning and half-space pruning are the two most notable pruning strategies. The half-space-based approach prunes a larger area and is generally believed to be superior. Influenced by this perception, almost all existing RkNN algorithms utilize and improve the half-space pruning strategy. We observe the weaknesses and strengths of both strategies and discover that the regions-based pruning has certain strengths that have not been exploited in the past. Motivated by this, we present a new regions-based pruning algorithm called Slice that utilizes the strength of regions-based pruning and overcomes its limitations. We also study spatial reverse top-$k$ (SRTk) queries that return every user u for which the query facility is one of the top-$k$ facilities according to a given linear scoring function. We first extend half-space-based pruning to answer SRTk queries. Then, we propose a novel regions-based pruning algorithm following Slice framework to solve the problem. Our extensive experimental study on synthetic and real data sets demonstrates that Slice is significantly more efficient than all existing RkNN and SRTk algorithms.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2017:DBQ, author = "Kun Li and Xiaofeng Zhou and Daisy Zhe Wang and Christan Grant and Alin Dobra and Christopher Dudley", title = "In-database batch and query-time inference over probabilistic graphical models using {UDA} --- {GIST}", journal = j-VLDB-J, volume = "26", number = "2", pages = "177--201", month = apr, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0446-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Mar 27 20:55:44 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "To meet customers' pressing demands, enterprise database vendors have been pushing advanced analytical techniques into databases. Most major DBMSes use user-defined aggregates (UDAs), a data-driven operator, to implement analytical techniques in parallel. However, UDAs alone are not sufficient to implement statistical algorithms where most of the work is performed by iterative transitions over a large state that cannot be naively partitioned due to data dependency. Typically, this type of statistical algorithm requires pre-processing to set up the large state in the first place and demands post-processing after the statistical inference. This paper presents general iterative state transition (GIST), a new database operator for parallel iterative state transitions over large states. GIST receives a state constructed by a UDA and then performs rounds of transitions on the state until it converges. A final UDA performs post-processing and result extraction. We argue that the combination of UDA and GIST (UDA---GIST) unifies data-parallel and state-parallel processing in a single system, thus significantly extending the analytical capabilities of DBMSes. We exemplify the framework through two high-profile batch applications: cross-document coreference, image denoising and one query-time inference application: marginal inference queries over probabilistic knowledge graphs. The 3 applications use probabilistic graphical models, which encode complex relationships of different variables and are powerful for a wide range of problems. We show that the in-database framework allows us to tackle a 27 times larger problem than a scalable distributed solution for the first application and achieves 43 times speedup over the state-of-the-art for the second application. For the third application, we implement query-time inference using the UDA---GIST framework and apply over a probabilistic knowledge graph, achieving 10 times speedup over sequential inference. To the best of our knowledge, this is the first in-database query-time inference engine over large probabilistic knowledge base. We show that the UDA---GIST framework for data- and graph-parallel computations can support both batch and query-time inference efficiently in databases.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Xie:2017:PTP, author = "Miao Xie and Sourav S. Bhowmick and Gao Cong and Qing Wang", title = "{PANDA}: toward partial topology-based search on large networks in a single machine", journal = j-VLDB-J, volume = "26", number = "2", pages = "203--228", month = apr, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0447-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Mar 27 20:55:44 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A large body of research has focused on efficient and scalable processing of subgraph search queries on large networks. In these efforts, a query is posed in the form of a connected query graph. Unfortunately, in practice end users may not always have precise knowledge about the topological relationships between nodes in a query graph to formulate a connected query. In this paper, we present a novel graph querying paradigm called partial topology-based network search and propose a query processing framework called panda to efficiently process partial topology query (ptq) in a single machine. A ptq is a disconnected query graph containing multiple connected query components. ptqs allow an end user to formulate queries without demanding precise information about the complete topology of a query graph. To this end, we propose an exact and an approximate algorithm called sen-panda and po-panda, respectively, to generate top-$k$ matches of a ptq. We also present a subgraph simulation-based optimization technique to further speedup the processing of ptqs. Using real-life networks with millions of nodes, we experimentally verify that our proposed algorithms are superior to several baseline techniques.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yang:2017:SPM, author = "Mohan Yang and Alexander Shkapsky and Carlo Zaniolo", title = "Scaling up the performance of more powerful {Datalog} systems on multicore machines", journal = j-VLDB-J, volume = "26", number = "2", pages = "229--248", month = apr, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0448-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Mar 27 20:55:44 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Extending RDBMS technology to achieve performance and scalability for queries that are much more powerful than those of SQL-2 has been the goal of deductive database research for more than thirty years. The \mathcal{D}e\mathcal{A}\mathcal{L}\mathcal{S}DeALS system has made major progress toward this goal, by (1) Datalog extensions that support the more powerful recursive queries needed in advanced applications, and (2) superior performance for both traditional recursive queries and those made possible by the new extensions, while (3) delivering competitive performance with commercial RDBMSs on non-recursive queries. In this paper, we focus on the techniques used to support the in-memory evaluation of Datalog programs on multicore machines. In \mathcal{D}e\mathcal{A}\mathcal {L}\mathcal{S}DeALS, a Datalog program is represented as an AND/OR tree, and multiple copies of the same AND/OR tree are used to access the tables in the database concurrently during the parallel evaluation. We describe compilation techniques that (1) recognize when the given program is lock-free, (2) transform a locking program into a lock-free program, and (3) find an efficient parallel plan that correctly evaluates the program while minimizing the use of locks and other overhead required for parallel evaluation. Extensive experiments demonstrate the effectiveness of the proposed techniques.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yu:2017:UFS, author = "Minghe Yu and Jin Wang and Guoliang Li and Yong Zhang and Dong Deng and Jianhua Feng", title = "A unified framework for string similarity search with edit-distance constraint", journal = j-VLDB-J, volume = "26", number = "2", pages = "249--274", month = apr, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0449-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Mar 27 20:55:44 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "String similarity search is a fundamental operation in data cleaning and integration. It has two variants: threshold-based string similarity search and top-$ k k $ string similarity search. Existing algorithms are efficient for either the former or the latter; most of them cannot support both two variants. To address this limitation, we propose a unified framework. We first recursively partition strings into disjoint segments and build a hierarchical segment tree index ({\textsf {HS}}{\text {-}}{\textsf {Tree}}HS-Tree) on top of the segments. Then, we utilize the {\textsf {HS}}{\text {-}}{\textsf {Tree}}HS-Tree to support similarity search. For threshold-based search, we identify appropriate tree nodes based on the threshold to answer the query and devise an efficient algorithm (HS-Search). For top-$ k k $ search, we identify promising strings with large possibility to be similar to the query, utilize these strings to estimate an upper bound which is used to prune dissimilar strings and propose an algorithm (HS-Topk). We develop effective pruning techniques to further improve the performance. To support large data sets, we extend our techniques to support the disk-based setting. Experimental results on real-world data sets show that our method achieves high performance on the two problems and outperforms state-of-the-art algorithms by 5---10 times.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yuan:2017:EEG, author = "Long Yuan and Lu Qin and Xuemin Lin and Lijun Chang and Wenjie Zhang", title = "{I/O} efficient {ECC} graph decomposition via graph reduction", journal = j-VLDB-J, volume = "26", number = "2", pages = "275--300", month = apr, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0451-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Mar 27 20:55:44 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The problem of computing $k$-edge connected components ($k$-\mathsf {ECC}ECCs) of a graph G for a specific $k$ is a fundamental graph problem and has been investigated recently. In this paper, we study the problem of \mathsf {ECC}ECC decomposition, which computes the $k$-\mathsf {ECC}ECCs of a graph G for all possible k values. \mathsf {ECC}ECC decomposition can be widely applied in a variety of applications such as graph-topology analysis, community detection, Steiner Component Search, and graph visualization. A straightforward solution for \mathsf {ECC}ECC decomposition is to apply the existing $k$-\mathsf {ECC}ECC computation algorithm to compute the $k$-\mathsf {ECC}ECCs for all $k$ values. However, this solution is not applicable to large graphs for two challenging reasons. First, all existing $k$-\mathsf {ECC}ECC computation algorithms are highly memory intensive due to the complex data structures used in the algorithms. Second, the number of possible $k$ values can be very large, resulting in a high computational cost when each $k$ value is independently considered. In this paper, we address the above challenges, and study I/O efficient \mathsf {ECC}ECC decomposition via graph reduction. We introduce two elegant graph reduction operators which aim to reduce the size of the graph loaded in memory while preserving the connectivity information of a certain set of edges to be computed for a specific k. We also propose three novel I/O efficient algorithms, \mathsf{Bottom}-\mathsf {Up}, \mathsf {Top}-\mathsf {Down}, and \mathsf {Hybrid}, that explore the $k$ values in different orders to reduce the redundant computations between different $k$ values. We analyze the I/O and memory costs for all proposed algorithms. In addition, we extend our algorithm to build an efficient index for Steiner Component Search. We show that our index can be used to perform Steiner Component Search in optimal I/Os when only the node information of the graph is allowed to be loaded in memory. In our experiments, we evaluate our algorithms using seven real large datasets with various graph properties, one of which contains 1.95 billion edges. The experimental results show that our proposed algorithms are scalable and efficient.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2017:TKS, author = "Xiang Wang and Wenjie Zhang and Ying Zhang and Xuemin Lin and Zengfeng Huang", title = "Top-$k$ spatial-keyword publish\slash subscribe over sliding window", journal = j-VLDB-J, volume = "26", number = "3", pages = "301--326", month = jun, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0453-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jun 24 11:54:27 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "With the prevalence of social media and GPS-enabled devices, a massive amount of geo-textual data have been generated in a stream fashion, leading to a variety of applications such as location-based recommendation and information dissemination. In this paper, we investigate a novel real-time top-kk monitoring problem over sliding window of streaming data; that is, we continuously maintain the top-$k$ most relevant geo-textual messages (e.g., geo-tagged tweets) for a large number of spatial-keyword subscriptions (e.g., registered users interested in local events) simultaneously. To provide the most recent information under controllable memory cost, sliding window model is employed on the streaming geo-textual data. To the best of our knowledge, this is the first work to study top-kk spatial-keyword publish/subscribe over sliding window. A novel centralized system, called Skype (Top-kSpatial-keyword Publish/Subscribe), is proposed in this paper. In Skype, to continuously maintain top-kk results for massive subscriptions, we devise a novel indexing structure upon subscriptions such that each incoming message can be immediately delivered on its arrival. To reduce the expensive top-kk re-evaluation cost triggered by message expiration, we develop a novel cost-basedk-skyband technique to reduce the number of re-evaluations in a cost-effective way. Extensive experiments verify the great efficiency and effectiveness of our proposed techniques. Furthermore, to support better scalability and higher throughput, we propose a distributed version of Skype, namely DSkype, on top of Storm, which is a popular distributed stream processing system. With the help of fine-tuned subscription/message distribution mechanisms, DSkype can achieve orders of magnitude speed-up than its centralized version.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Gao:2017:PBH, author = "Jun Gao and Yuqiong Liu and Chang Zhou and Jeffrey Xu Yu", title = "Path-based holistic detection plan for multiple patterns in distributed graph frameworks", journal = j-VLDB-J, volume = "26", number = "3", pages = "327--345", month = jun, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-016-0452-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jun 24 11:54:27 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Multiple pattern detection is needed in applications like disease analysis over gene networks, bug detection in program flow networks. This paper takes pattern detection to investigate the evaluation and optimization of multiple jobs in existing distributed graph processing frameworks. The evaluation plan for multiple pattern detection should be parallelizable and can capture and reuse the shared parts among pattern queries easily. In this paper, we design a path-based holistic plan for multiple pattern queries. Specifically, (1) we design a path-based edge-covered plan for an individual pattern. The paths in the plan can be easily captured and reused among different queries. Additionally, the evaluation plan is fully parallelizable, in which each data vertex performs necessary join operations independently during exploring graph. (2) We extend the individual plan to a holistic evaluation plan for multiple queries, whose results are equivalent to those of individual queries. The plan reduces the overall cost by finding frequent paths among queries and reusing the shared part in the holistic plan. (3) We devise various optimization strategies over the holistic plan. The experimental studies, conducted on Giraph, illustrate the high effectiveness of our holistic approaches.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yi:2017:AVQ, author = "Peipei Yi and Byron Choi and Sourav S. Bhowmick and Jianliang Xu", title = "{AutoG}: a visual query autocompletion framework for graph databases", journal = j-VLDB-J, volume = "26", number = "3", pages = "347--372", month = jun, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0454-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jun 24 11:54:27 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Composing queries is evidently a tedious task. This is particularly true of graph queries as they are typically complex and prone to errors, compounded by the fact that graph schemas can be missing or too loose to be helpful for query formulation. Despite the great success of query formulation aids, in particular, automatic query completion, graph query autocompletion has received much less research attention. In this paper, we propose a novel framework for subgraph query autocompletion (called AutoG). Given an initial query q and a user's preference as input, AutoG returns ranked query suggestions Q'Q'z as output. Users may choose a query from Q'Q'z and iteratively apply AutoG to compose their queries. The novelties of AutoG are as follows: First, we formalize query composition. Second, we propose to increment a query with the logical units called c-prime features that are (i) frequent subgraphs and (ii) constructed from smaller c-prime features in no more than c ways. Third, we propose algorithms to rank candidate suggestions. Fourth, we propose a novel index called feature Dag (FDag) to optimize the ranking. We study the query suggestion quality with simulations and real users and conduct an extensive performance evaluation. The results show that the query suggestions are useful (saved roughly 40\% of users' mouse clicks), and AutoG returns suggestions shortly under a large variety of parameter settings.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Aljubayrin:2017:FLC, author = "Saad Aljubayrin and Jianzhong Qi and Christian S. Jensen and Rui Zhang and Zhen He and Yuan Li", title = "Finding lowest-cost paths in settings with safe and preferred zones", journal = j-VLDB-J, volume = "26", number = "3", pages = "373--397", month = jun, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0455-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jun 24 11:54:27 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We define and study Euclidean and spatial network variants of a new path finding problem: given a set of safe or preferred zones with zero or low cost, find paths that minimize the cost of travel from an origin to a destination. In this problem, the entire space is passable, with preference given to safe or preferred zones. Existing algorithms for problems that involve unsafe regions to be avoided strictly are not effective for this new problem. To solve the Euclidean variant, we devise a transformation of the continuous data space with safe zones into a discrete graph upon which shortest path algorithms apply. A naive transformation yields a large graph that is expensive to search. In contrast, our transformation exploits properties of hyperbolas in Euclidean space to safely eliminate graph edges, thus improving performance without affecting correctness. To solve the spatial network variant, we propose a different graph-to-graph transformation that identifies critical points that serve the same purpose as do the hyperbolas, thus also avoiding the extraneous edges. Having solved the problem for safe zones with zero costs, we extend the transformations to the weighted version of the problem, where travel in preferred zones has nonzero costs. Experiments on both real and synthetic data show that our approaches outperform baseline approaches by more than an order of magnitude in graph construction time, storage space, and query response time.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhang:2017:DSP, author = "Dongxiang Zhang and Dingyu Yang and Yuan Wang and Kian-Lee Tan and Jian Cao and Heng Tao Shen", title = "Distributed shortest path query processing on dynamic road networks", journal = j-VLDB-J, volume = "26", number = "3", pages = "399--419", month = jun, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0457-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jun 24 11:54:27 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Shortest path query processing on dynamic road networks is a fundamental component for real-time navigation systems. In the face of an enormous volume of customer demand from Uber and similar apps, it is desirable to study distributed shortest path query processing that can be deployed on elastic and fault-tolerant cloud platforms. In this paper, we combine the merits of distributed streaming computing systems and lightweight indexing to build an efficient shortest path query processing engine on top of Yahoo S4. We propose two types of asynchronous communication algorithms for early termination. One is first-in-first-out message propagation with certain optimizations, and the other is prioritized message propagation with the help of navigational intelligence. Extensive experiments were conducted on large-scale real road networks, and the results show that the query efficiency of our methods can meet the real-time requirement and is superior to Pregel and Pregel+. The source code of our system is publicly available at https://github.com/yangdingyu/cands.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lai:2017:SSE, author = "Longbin Lai and Lu Qin and Xuemin Lin and Lijun Chang", title = "Scalable subgraph enumeration in {MapReduce}: a cost-oriented approach", journal = j-VLDB-J, volume = "26", number = "3", pages = "421--446", month = jun, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0459-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jun 24 11:54:27 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Subgraph enumeration, which aims to find all the subgraphs of a large data graph that are isomorphic to a given pattern graph, is a fundamental graph problem with a wide range of applications. However, existing sequential algorithms for subgraph enumeration fall short in handling large graphs due to the involvement of computationally intensive subgraph isomorphism operations. Thus, some recent researches focus on solving the problem using MapReduce. Nevertheless, exiting MapReduce approaches are not scalable to handle very large graphs since they either produce a huge number of partial results or consume a large amount of memory. Motivated by this, in this paper, we propose a new algorithm \mathsf {Twin}Twin\mathsf {Twig}Twig\mathsf {Join}Join based on a left-deep-join framework in MapReduce, in which the basic join unit is a \mathsf {Twin}Twin\mathsf {Twig}Twig (an edge or two incident edges of a node). We show that in the Erd{\"o}s---R{\'e}nyi random graph model, \mathsf {Twin}Twin\mathsf {Twig}Twig\mathsf {Join}Join is instance optimal in the left-deep-join framework under reasonable assumptions, and we devise an algorithm to compute the optimal join plan. We further discuss how our approach can be adapted to handle the power-law random graph model. Three optimization strategies are explored to improve our algorithm. Ultimately, by aggregating equivalent nodes into a compressed node, we construct the compressed graph, upon which the subgraph enumeration is further improved. We conduct extensive performance studies in several real graphs, one of which contains billions of edges. Our approach significantly outperforms existing solutions in all tests.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Cafagna:2017:DIP, author = "Francesco Cafagna and Michael H. B{\"o}hlen", title = "Disjoint interval partitioning", journal = j-VLDB-J, volume = "26", number = "3", pages = "447--466", month = jun, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0456-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jun 24 11:54:27 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In databases with time interval attributes, query processing techniques that are based on sort-merge or sort-aggregate deteriorate. This happens because for intervals no total order exists and either the start or end point is used for the sorting. Doing so leads to inefficient solutions with lots of unproductive comparisons that do not produce an output tuple. Even if just one tuple with a long interval is present in the data, the number of unproductive comparisons of sort-merge and sort-aggregate gets quadratic. In this paper we propose disjoint interval partitioning (\mathcal {DIP}DIP), a technique to efficiently perform sort-based operators on interval data. \mathcal {DIP}DIP divides an input relation into the minimum number of partitions, such that all tuples in a partition are non-overlapping. The absence of overlapping tuples guarantees efficient sort-merge computations without backtracking. With \mathcal {DIP}DIP the number of unproductive comparisons is linear in the number of partitions. In contrast to current solutions with inefficient random accesses to the active tuples, \mathcal {DIP}DIP fetches the tuples in a partition sequentially. We illustrate the generality and efficiency of \mathcal {DIP}DIP by describing and evaluating three basic database operators over interval data: join, anti-join and aggregation.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Gao:2017:EFR, author = "Yunjun Gao and Xiaoye Miao and Gang Chen and Baihua Zheng and Deng Cai and Huiyong Cui", title = "On efficiently finding reverse $k$-nearest neighbors over uncertain graphs", journal = j-VLDB-J, volume = "26", number = "4", pages = "467--492", month = aug, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0460-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 27 16:38:23 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Reverse $k$-nearest neighbor (\hbox {R}k\hbox {NN}RkNN) query on graphs returns the data objects that take a specified query object q as one of their $k$-nearest neighbors. It has significant influence in many real-life applications including resource allocation and profile-based marketing. However, to the best of our knowledge, there is little previous work on \hbox {R}k\hbox {NN}RkNN search over uncertain graph data, even though many complex networks such as traffic networks and protein---protein interaction networks are often modeled as uncertain graphs. In this paper, we systematically study the problem of reverse $k$-nearest neighbor search on uncertain graphs (\hbox {UG-R}k\hbox {NN}UG-RkNN search for short), where graph edges contain uncertainty. First, to address \hbox {UG-R}k\hbox {NN}UG-RkNN search, we propose three effective heuristics, i.e., GSP, EGR, and PBP, which minimize the original large uncertain graph as a much smaller essential uncertain graph, cut down the number of possible graphs via the newly introduced graph conditional dominance relationship, and reduce the validation cost of data nodes in order to improve query efficiency. Then, we present an efficient algorithm, termed as SDP, to support \hbox {UG-R}k\hbox {NN}UG-RkNN retrieval by seamlessly integrating the three heuristics together. In view of the high complexity of \hbox {UG-R}k\hbox {NN}UG-RkNN search, we further present a novel algorithm called TripS, with the help of an adaptive stratified sampling technique. Extensive experiments using both real and synthetic graphs demonstrate the performance of our proposed algorithms.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Tao:2017:SSW, author = "Yufei Tao and Xiaocheng Hu and Miao Qiao", title = "Stream sampling over windows with worst-case optimality and $ \ell $-overlap independence", journal = j-VLDB-J, volume = "26", number = "4", pages = "493--510", month = aug, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0461-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 27 16:38:23 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Sampling provides fundamental support to numerous applications that cannot afford to materialize all the objects arriving at a rapid speed. Existing stream sampling algorithms guarantee small space and query overhead, but all require worst-case update time proportional to the number of samples. This creates a performance issue when a large sample set is required. In this paper, we propose a new sampling algorithm that is optimal simultaneously in all the three aspects: space, query time, and update time. In particular, the algorithm handles an update in $ O(1) $ worst-case time with a very small hidden constant. Our algorithm also ensures a strong independence guarantee: the sample sets of all the queries are mutually independent as long as the overlap between two query windows is small.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Nguyen:2017:ADC, author = "Quoc Viet Nguyen and Chi Thang Duong and Thanh Tam Nguyen and Matthias Weidlich and Karl Aberer and Hongzhi Yin and Xiaofang Zhou", title = "Argument discovery via crowdsourcing", journal = j-VLDB-J, volume = "26", number = "4", pages = "511--535", month = aug, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0462-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 27 16:38:23 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The amount of controversial issues being discussed on the Web has been growing dramatically. In articles, blogs, and wikis, people express their points of view in the form of arguments, i.e., claims that are supported by evidence. Discovery of arguments has a large potential for informing decision-making. However, argument discovery is hindered by the sheer amount of available Web data and its unstructured, free-text representation. The former calls for automatic text-mining approaches, whereas the latter implies a need for manual processing to extract the structure of arguments. In this paper, we propose a crowdsourcing-based approach to build a corpus of arguments, an argumentation base, thereby mediating the trade-off of automatic text-mining and manual processing in argument discovery. We develop an end-to-end process that minimizes the crowd cost while maximizing the quality of crowd answers by: (1) ranking argumentative texts, (2) pro-actively eliciting user input to extract arguments from these texts, and (3) aggregating heterogeneous crowd answers. Our experiments with real-world datasets highlight that our method discovers virtually all arguments in documents when processing only 25\% of the text with more than 80\% precision, using only 50\% of the budget consumed by a baseline algorithm.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2017:EMA, author = "Tianzheng Wang and Ryan Johnson and Alan Fekete and Ippokratis Pandis", title = "Efficiently making (almost) any concurrency control mechanism serializable", journal = j-VLDB-J, volume = "26", number = "4", pages = "537--562", month = aug, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0463-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 27 16:38:23 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "See erratum \cite{Wang:2018:EEM}.", abstract = "Concurrency control (CC) algorithms must trade off strictness for performance. In particular, serializable CC schemes generally pay higher cost to prevent anomalies, both in runtime overhead such as the maintenance of lock tables and in efforts wasted by aborting transactions. We propose the serial safety net (SSN), a serializability-enforcing certifier which can be applied on top of various CC schemes that offer higher performance but admit anomalies, such as snapshot isolation and read committed. The underlying CC mechanism retains control of scheduling and transactional accesses, while SSN tracks the resulting dependencies. At commit time, SSN performs a validation test by examining only direct dependencies of the committing transaction to determine whether it can commit safely or must abort to avoid a potential dependency cycle. SSN performs robustly for a variety of workloads. It maintains the characteristics of the underlying CC without biasing toward a certain type of transactions, though the underlying CC scheme might. Besides traditional OLTP workloads, SSN also efficiently handles heterogeneous workloads which include a significant portion of long, read-mostly transactions. SSN can avoid tracking the vast majority of reads (thus reducing the overhead of serializability certification) and still produce serializable executions with little overhead. The dependency tracking and validation tests can be done efficiently, fully parallel and latch-free, for multi-version systems on modern hardware with substantial core count and large main memory. We demonstrate the efficiency, accuracy and robustness of SSN using extensive simulations and an implementation that overlays snapshot isolation in ERMIA, a memory-optimized OLTP engine that supports multiple CC schemes. Evaluation results confirm that SSN is a promising approach to serializability with robust performance and low overhead for various workloads.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhu:2017:EAT, author = "Qiankun Zhu and Hong Cheng and Xin Huang", title = "{I/O}-efficient algorithms for top-$k$ nearest keyword search in massive graphs", journal = j-VLDB-J, volume = "26", number = "4", pages = "563--583", month = aug, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0464-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 27 16:38:23 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Networks emerging nowadays usually have labels or textual content on the nodes. We model such commonly seen network as an undirected graph G, in which each node is attached with zero or more keywords, and each edge is assigned with a length. On such networks, a novel and useful query is called top-k nearest keyword (\mathsf {k\text {-}NK}k-NK) search. Given a query node q in G and a keyword \lambda `?, a \mathsf {k\text {-}NK}k-NK query searches k nodes which contain \lambda `? and are nearest to q. The \mathsf {k\text {-}NK}k-NK problem has been studied recently in the literature. But most existing solutions assume that the graph as well as the constructed index can fit entirely in memory. As a result, they cannot be applied directly to very large-scale networks which are commonly found in practice, but cannot fit in memory. In this work, we design an I/O-efficient solution, which uses a compact disk index to answer a \mathsf {k\text {-}NK}k-NK query with constant I/Os. The key to an accurate \mathsf {k\text {-}NK}k-NK result is a precise shortest distance estimation in a graph. In our solution, we follow our previous work Qiao et al. (PVLDB 6:901---912, 2013) which uses the shortest path tree as an approximate representation of a graph and uses the tree distance between two nodes as an accurate estimation of the shortest distance between them on a graph. With such representation, the original \mathsf {k\text {-}NK}k-NK query on a graph can be reduced to answering the query on a set of trees and then assembling the results obtained from the trees. We exploit a compact tree-based index and study how to lay out the index to disk. We design a novel technique which decomposes the index tree into paths and subtrees and stores them in disk. Our theoretical analysis shows that the disk-based index is small in size and supports constant query I/Os. Extensive experimental study on massive trees and graphs with billions of edges and keywords verifies our theoretical findings and demonstrates the superiority of our method over the state-of-the-art methods in the literature.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chen:2017:IMU, author = "Lu Chen and Yunjun Gao and Aoxiao Zhong and Christian S. Jensen and Gang Chen and Baihua Zheng", title = "Indexing metric uncertain data for range queries and range joins", journal = j-VLDB-J, volume = "26", number = "4", pages = "585--610", month = aug, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0465-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jul 27 16:38:23 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Range queries and range joins in metric spaces have applications in many areas, including GIS, computational biology, and data integration, where metric uncertain data exist in different forms, resulting from circumstances such as equipment limitations, high-throughput sequencing technologies, and privacy preservation. We represent metric uncertain data by using an object-level model and a bi-level model, respectively. Two novel indexes, the uncertain pivot B^{+}+-tree (UPB-tree) and the uncertain pivot B^{+}+-forest (UPB-forest), are proposed in order to support probabilistic range queries and range joins for a wide range of uncertain data types and similarity metrics. Both index structures use a small set of effective pivots chosen based on a newly defined criterion and employ the B^{+}+-tree(s) as the underlying index. In addition, we present efficient metric probabilistic range query and metric probabilistic range join algorithms, which utilize validation and pruning techniques based on derived probability lower and upper bounds. Extensive experiments with both real and synthetic data sets demonstrate that, compared against existing state-of-the-art indexes for metric uncertain data, the UPB-tree and the UPB-forest incur much lower construction costs, consume less storage space, and can support more efficient metric probabilistic range queries and metric probabilistic range joins.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Song:2017:GRU, author = "Shaoxu Song and Boge Liu and Hong Cheng and Jeffrey Xu Yu and Lei Chen", title = "Graph repairing under neighborhood constraints", journal = j-VLDB-J, volume = "26", number = "5", pages = "611--635", month = oct, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0466-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Oct 2 16:14:05 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "A broad class of data, ranging from similarity networks, workflow networks to protein networks, can be modeled as graphs with data values as vertex labels. Both vertex labels and neighbors could be dirty for various reasons such as typos or erroneous reporting of results in scientific experiments. Neighborhood constraints, specifying label pairs that are allowed to appear on adjacent vertices in the graph, are employed to detect and repair erroneous vertex labels and neighbors. In this paper, we study the problem of repairing vertex labels and neighbors to make graphs satisfy neighborhood constraints. Unfortunately, the problem is generally hard, which motivates us to devise approximation methods for repairing and identify interesting special cases (star and clique constraints) that can be efficiently solved. First, we propose several label repairing approximation algorithms including greedy heuristics, contraction method and an approach combining both. The performances of algorithms are also analyzed for the special case. Moreover, we devise a cubic-time constant-factor graph repairing algorithm with both label and neighbor repairs (given degree-bounded instance graphs). Our extensive experimental evaluation on real data demonstrates the effectiveness of eliminating frauds in several types of application networks.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhou:2017:EOV, author = "Xiangmin Zhou and Lei Chen and Yanchun Zhang and Dong Qin and Longbing Cao and Guangyan Huang and Chen Wang", title = "Enhancing online video recommendation using social user interactions", journal = j-VLDB-J, volume = "26", number = "5", pages = "637--656", month = oct, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0469-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Oct 2 16:14:05 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The creation of media sharing communities has resulted in the astonishing increase of digital videos, and their wide applications in the domains like online news broadcasting, entertainment and advertisement. The improvement of these applications relies on effective solutions for social user access to videos. This fact has driven the research interest in the recommendation in shared communities. Though effort has been put into social video recommendation, the contextual information on social users has not been well exploited for effective recommendation. Motivated by this, in this paper, we propose a novel approach based on the video content and user information for the recommendation in shared communities. A new solution is developed by allowing batch video recommendation to multiple new users and optimizing the subcommunity extraction. We first propose an effective technique that reduces the subgraph partition cost based on graph decomposition and reconstruction for efficient subcommunity extraction. Then, we design a summarization-based algorithm which groups the clicked videos of multiple unregistered users and simultaneously provide recommendation to each of them. Finally, we present a nontrivial social updates maintenance approach for social data based on user connection summarization. We evaluate the performance of our solution over a large dataset considering different strategies for group video recommendation in sharing communities.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Attasena:2017:SSC, author = "Varunya Attasena and J{\'e}r{\^o}me Darmont and Nouria Harbi", title = "Secret sharing for cloud data security: a survey", journal = j-VLDB-J, volume = "26", number = "5", pages = "657--681", month = oct, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0470-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Oct 2 16:14:05 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Cloud computing helps reduce costs, increase business agility and deploy solutions with a high return on investment for many types of applications. However, data security is of premium importance to many users and often restrains their adoption of cloud technologies. Various approaches, i.e., data encryption, anonymization, replication and verification, help enforce different facets of data security. Secret sharing is a particularly interesting cryptographic technique. Its most advanced variants indeed simultaneously enforce data privacy, availability and integrity, while allowing computation on encrypted data. The aim of this paper is thus to wholly survey secret sharing schemes with respect to data security, data access and costs in the pay-as-you-go paradigm.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Huang:2017:QAL, author = "Qiang Huang and Jianlin Feng and Qiong Fang and Wilfred Ng and Wei Wang", title = "Query-aware locality-sensitive hashing scheme for $ l_p $ norm", journal = j-VLDB-J, volume = "26", number = "5", pages = "683--708", month = oct, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0472-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Oct 2 16:14:05 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The problem of c-Approximate Nearest Neighbor (c-ANN) search in high-dimensional space is fundamentally important in many applications, such as image database and data mining. Locality-Sensitive Hashing (LSH) and its variants are the well-known indexing schemes to tackle the c-ANN search problem. Traditionally, LSH functions are constructed in a query-oblivious manner, in the sense that buckets are partitioned before any query arrives. However, objects closer to a query may be partitioned into different buckets, which is undesirable. Due to the use of query-oblivious bucket partition, the state-of-the-art LSH schemes for external memory, namely C2LSH and LSB-Forest, only work with approximation ratio of integer $ c \ge 2 c'z2 $. In this paper, we introduce a novel concept of query-aware bucket partition which uses a given query as the ``anchor'' for bucket partition. Accordingly, a query-aware LSH function under a specific $ l_p $ norm with $ p \in (0, 2]p'z(0, 2] $ is a random projection coupled with query-aware bucket partition, which removes random shift required by traditional query-oblivious LSH functions. The query-aware bucket partitioning strategy can be easily implemented so that query performance is guaranteed. For each $ l_p $ norm $ (p \in (0, 2])(p'z(0, 2]) $, based on the corresponding p-stable distribution, we propose a novel LSH scheme named query-aware LSH (QALSH) for c-ANN search over external memory. Our theoretical studies show that QALSH enjoys a guarantee on query quality. The use of query-aware LSH function enables QALSH to work with any approximation ratio $ c > 1 $. In addition, we propose a heuristic variant named QALSH^++ to improve the scalability of QALSH. Extensive experiments show that QALSH and QALSH^++ outperform the state-of-the-art schemes, especially in high-dimensional space. Specifically, by using a ratio $ c < 2 $, QALSH can achieve much better query quality.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhu:2017:GSG, author = "Qijun Zhu and Haibo Hu and Cheng Xu and Jianliang Xu and Wang-Chien Lee", title = "Geo-social group queries with minimum acquaintance constraints", journal = j-VLDB-J, volume = "26", number = "5", pages = "709--727", month = oct, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0473-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Oct 2 16:14:05 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The prosperity of location-based social networking has paved the way for new applications of group-based activity planning and marketing. While such applications heavily rely on geo-social group queries (GSGQs), existing studies fail to produce a cohesive group in terms of user acquaintance. In this paper, we propose a new family of GSGQs with minimum acquaintance constraints. They are more appealing to users as they guarantee a worst-case acquaintance level in the result group. For efficient processing of GSGQs on large location-based social networks, we devise two social-aware spatial index structures, namely SaR-tree and SaR*-tree. The latter improves on the former by considering both spatial and social distances when clustering objects. Based on SaR-tree and SaR*-tree, novel algorithms are developed to process various GSGQs. Extensive experiments on real datasets Gowalla and Twitter show that our proposed methods substantially outperform the baseline algorithms under various system settings.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhang:2017:DMK, author = "Kai Zhang and Kaibo Wang and Yuan Yuan and Lei Guo and Rubao Li and Xiaodong Zhang and Bingsheng He and Jiayu Hu and Bei Hua", title = "A distributed in-memory key-value store system on heterogeneous {CPU--GPU} cluster", journal = j-VLDB-J, volume = "26", number = "5", pages = "729--750", month = oct, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0479-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Oct 2 16:14:05 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In-memory key-value stores play a critical role in many data-intensive applications to provide high-throughput and low latency data accesses. In-memory key-value stores have several unique properties that include (1) data-intensive operations demanding high memory bandwidth for fast data accesses, (2) high data parallelism and simple computing operations demanding many slim parallel computing units, and (3) a large working set. However, our experiments show that homogeneous multicore CPU systems are increasingly mismatched to the special properties of key-value stores because they do not provide massive data parallelism and high memory bandwidth; the powerful but the limited number of computing cores does not satisfy the demand of the unique data processing task; and the cache hierarchy may not well benefit to the large working set. In this paper, we present the design and implementation of Mega-KV, a distributed in-memory key-value store system on a heterogeneous CPU---GPU cluster. Effectively utilizing the high memory bandwidth and latency hiding capability of GPUs, Mega-KV provides fast data accesses and significantly boosts overall performance and energy efficiency over the homogeneous CPU architectures. Mega-KV shows excellent scalability and processes up to 623-million key-value operations per second on a cluster installed with eight CPUs and eight GPUs, while delivering an efficiency of up to 299-thousand operations per Watt (KOPS/W).", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2017:FIC, author = "Rong-Hua Li and Lu Qin and Jeffrey Xu Yu and Rui Mao", title = "Finding influential communities in massive networks", journal = j-VLDB-J, volume = "26", number = "6", pages = "751--776", month = dec, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0467-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Nov 10 08:53:24 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Community search is a problem of finding densely connected subgraphs that satisfy the query conditions in a network, which has attracted much attention in recent years. However, all the previous studies on community search do not consider the influence of a community. In this paper, we introduce a novel community model called k-influential community based on the concept of k-core to capture the influence of a community. Based on this community model, we propose a linear time online search algorithm to find the top-rk-influential communities in a network. To further speed up the influential community search algorithm, we devise a linear space data structure which supports efficient search of the top-rk-influential communities in optimal time. We also propose an efficient algorithm to maintain the data structure when the network is frequently updated. Additionally, we propose a novel I/O-efficient algorithm to find the top-rk-influential communities in a disk-resident graph under the assumption of {{\mathcal {U}}}=O(n)U=O(n), where {{\mathcal {U}}}U and n denote the size of the main memory and the number of nodes, respectively. Finally, we conduct extensive experiments on six real-world massive networks, and the results demonstrate the efficiency and effectiveness of the proposed methods.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ali:2017:CDP, author = "Syed Muhammad Ali and Robert Wrembel", title = "From conceptual design to performance optimization of {ETL} workflows: current state of research and open problems", journal = j-VLDB-J, volume = "26", number = "6", pages = "777--801", month = dec, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0477-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Nov 10 08:53:24 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper, we discuss the state of the art and current trends in designing and optimizing ETL workflows. We explain the existing techniques for: (1) constructing a conceptual and a logical model of an ETL workflow, (2) its corresponding physical implementation, and (3) its optimization, illustrated by examples. The discussed techniques are analyzed w.r.t. their advantages, disadvantages, and challenges in the context of metrics such as autonomous behavior, support for quality metrics, and support for ETL activities as user-defined functions. We draw conclusions on still open research and technological issues in the field of ETL. Finally, we propose a theoretical ETL framework for ETL optimization.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Fang:2017:EEA, author = "Yixiang Fang and Reynold Cheng and Yankai Chen and Siqiang Luo and Jiafeng Hu", title = "Effective and efficient attributed community search", journal = j-VLDB-J, volume = "26", number = "6", pages = "803--828", month = dec, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0482-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Nov 10 08:53:24 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Given a graph G and a vertex q \in Gq'zG, the community search query returns a subgraph of G that contains vertices related to q. Communities, which are prevalent in attributed graphs such as social networks and knowledge bases, can be used in emerging applications such as product advertisement and setting up of social events. In this paper, we investigate the attributed community query (or ACQ), which returns an attributed community (AC) for an attributed graph. The AC is a subgraph of G, which satisfies both structure cohesiveness (i.e., its vertices are tightly connected) and keyword cohesiveness (i.e., its vertices share common keywords). The AC enables a better understanding of how and why a community is formed (e.g., members of an AC have a common interest in music, because they all have the same keyword ``music''). An AC can be ``personalized''; for example, an ACQ user may specify that an AC returned should be related to some specific keywords like ``research'' and ``sports''. To enable efficient AC search, we develop the CL-tree index structure and three algorithms based on it. We further propose efficient algorithms for maintaining the index on dynamic graphs. Moreover, we study two problems that are related to the ACQ problem. We evaluate our solutions on six large graphs. Our results show that ACQ is more effective and efficient than existing community retrieval approaches. Moreover, an AC contains more precise and personalized information than that of existing community search and detection methods.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lu:2017:MES, author = "Wei Lu and Jiajia Hou and Ying Yan and Meihui Zhang and Xiaoyong Du and Thomas Moscibroda", title = "{MSQL}: efficient similarity search in metric spaces using {SQL}", journal = j-VLDB-J, volume = "26", number = "6", pages = "829--854", month = dec, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0481-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Nov 10 08:53:24 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Similarity search is a primitive operation that arises in a large variety of database applications. Typical examples include identifying articles with similar titles, finding similar images and music in a large digital object repository, etc. While there exist a wide spectrum of access methods for similarity queries in metric spaces, a practical solution that can be fully supported by existing RDBMS with high efficiency still remains an open problem. In this paper, we present MSQL, a practical solution for answering similarity queries in metric spaces fully using SQL. To the best of our knowledge, MSQL enables users to find similar objects by submitting SELECT-FROM-WHERE statements only. MSQL provides a uniform indexing scheme based on a standard built-in B^+B+-tree index, with the ability to accelerate the query processing using index seek. Various query optimization techniques are incorporated in MSQL to significantly reduce CPU and I/O cost. We deploy MSQL on top of PostgreSQL. Extensive experiments on various real data sets demonstrate MSQL's benefits, performing up to two orders of magnitude faster than existing domain-specific SQL-based solutions and being comparable to native solutions.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Hung:2017:AVG, author = "Nguyen Quoc Hung and Duong Chi Thang and Nguyen Thanh Tam and Matthias Weidlich and Karl Aberer and Hongzhi Yin and Xiaofang Zhou", title = "Answer validation for generic crowdsourcing tasks with minimal efforts", journal = j-VLDB-J, volume = "26", number = "6", pages = "855--880", month = dec, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0484-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Nov 10 08:53:24 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Crowdsourcing has been established as an essential means to scale human computation in diverse Web applications, reaching from data integration to information retrieval. Yet, crowd workers have wide-ranging levels of expertise. Large worker populations are heterogeneous and comprise a significant amount of faulty workers. As a consequence, quality insurance for crowd answers is commonly seen as the Achilles heel of crowdsourcing. Although various techniques for quality control have been proposed in recent years, a post-processing phase in which crowd answers are validated is still required. Such validation, however, is typically conducted by experts, whose availability is limited and whose work incurs comparatively high costs. This work aims at guiding an expert in the validation of crowd answers. We present a probabilistic model that helps to identify the most beneficial validation questions in terms of both improvement in result correctness and detection of faulty workers. By seeking expert feedback on the most problematic cases, we are able to obtain a set of high-quality answers, even if the expert does not validate the complete answer set. Our approach is applicable for a broad range of crowdsourcing tasks, including classification and counting. Our comprehensive evaluation using both real-world and synthetic datasets demonstrates that our techniques save up to 60\% of expert efforts compared to baseline methods when striving for perfect result correctness. In absolute terms, for most cases, we achieve close to perfect correctness after expert input has been sought for only 15\% of the crowdsourcing tasks.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Herschel:2017:SPW, author = "Melanie Herschel and Ralf Diestelk{\"a}mper and Houssem Ben Lahmar", title = "A survey on provenance: {What} for? {What} form? {What} from?", journal = j-VLDB-J, volume = "26", number = "6", pages = "881--906", month = dec, year = "2017", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0486-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Nov 10 08:53:24 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Provenance refers to any information describing the production process of an end product, which can be anything from a piece of digital data to a physical object. While this survey focuses on the former type of end product, this definition still leaves room for many different interpretations of and approaches to provenance. These are typically motivated by different application domains for provenance (e.g., accountability, reproducibility, process debugging) and varying technical requirements such as runtime, scalability, or privacy. As a result, we observe a wide variety of provenance types and provenance-generating methods. This survey provides an overview of the research field of provenance, focusing on what provenance is used for (what for?), what types of provenance have been defined and captured for the different applications (what form?), and which resources and system requirements impact the choice of deploying a particular provenance solution (what from?). For each of these three key questions, we provide a classification and review the state of the art for each class. We conclude with a summary and possible future research challenges.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wei:2018:RQI, author = "Hao Wei and Jeffrey Xu Yu and Can Lu and Ruoming Jin", title = "Reachability querying: an independent permutation labeling approach", journal = j-VLDB-J, volume = "27", number = "1", pages = "1--26", month = feb, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0468-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Feb 6 18:41:42 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Reachability query is a fundamental graph operation which answers whether a vertex can reach another vertex over a large directed graph $G$ with $n$ vertices and m edges and has been extensively studied. In the literature, all the approaches compute a label for every vertex in a graph $G$ by index construction offline. The query time for answering reachability queries online is affected by the quality of the labels computed in index construction. The three main costs are the index construction time, the index size, and the query time. Some of the up-to-date approaches can answer reachability queries efficiently, but spend nonlinear time to construct an index. Some of the up-to-date approaches construct an index in linear time and space, but may need to depth-first search $G$ at run-time in $ O(n + m)$. In this paper, we discuss a new randomized labeling approach, named IP label, to answer reachability queries with probability guarantee, and the randomness is by independent permutation. Two additional labels are also proposed to further enhance the query processing. In addition, to deal with dynamic graphs, we discuss the label maintenance over dynamic graphs and give efficient algorithms for the labels proposed. We conduct extensive experimental studies to compare with the up-to-date approaches using 19 large real datasets used in the existing work and synthetic datasets. We confirm the efficiency and scalability of our approach in static graphs testing, and our maintenance algorithms are about one order of magnitude faster than the existing ones in dynamic graphs testing.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lin:2018:OAS, author = "Chunbin Lin and Jiaheng Lu and Zhewei Wei and Jianguo Wang and Xiaokui Xiao", title = "Optimal algorithms for selecting top-$k$ combinations of attributes: theory and applications", journal = j-VLDB-J, volume = "27", number = "1", pages = "27--52", month = feb, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0485-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Feb 6 18:41:42 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Traditional top-$k$ algorithms, e.g., TA and NRA, have been successfully applied in many areas such as information retrieval, data mining and databases. They are designed to discover k objects, e.g., top-$k$ restaurants, with highest overall scores aggregated from different attributes, e.g., price and location. However, new emerging applications like query recommendation require providing the best combinations of attributes, instead of objects. The straightforward extension based on the existing top-$k$ algorithms is prohibitively expensive to answer top-$k$ combinations because they need to enumerate all the possible combinations, which is exponential to the number of attributes. In this article, we formalize a novel type of top-$k$ query, called top-$k$, m, which aims to find top-$k$ combinations of attributes based on the overall scores of the top-m objects within each combination, where m is the number of objects forming a combination. We propose a family of efficient top-$k$, m algorithms with different data access methods, i.e., sorted accesses and random accesses and different query certainties, i.e., exact query processing and approximate query processing. Theoretically, we prove that our algorithms are instance optimal and analyze the bound of the depth of accesses. We further develop optimizations for efficient query evaluation to reduce the computational and the memory costs and the number of accesses. We provide a case study on the real applications of top-$k$, m queries for an online biomedical search engine. Finally, we perform comprehensive experiments to demonstrate the scalability and efficiency of top-$k$, m algorithms on multiple real-life datasets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhao:2018:ESS, author = "Xiang Zhao and Chuan Xiao and Xuemin Lin and Wenjie Zhang and Yang Wang", title = "Efficient structure similarity searches: a partition-based approach", journal = j-VLDB-J, volume = "27", number = "1", pages = "53--78", month = feb, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0487-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Feb 6 18:41:42 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Graphs are widely used to model complex data in many applications, such as bioinformatics, chemistry, social networks, pattern recognition. A fundamental and critical query primitive is to efficiently search similar structures in a large collection of graphs. This article mainly studies threshold-based graph similarity search with edit distance constraints. Existing solutions to the problem utilize fixed-size overlapping substructures to generate candidates, and thus become susceptible to large vertex degrees and distance thresholds. In this article, we present a partition-based approach to tackle the problem. By dividing data graphs into variable-size non-overlapping partitions, the edit distance constraint is converted to a graph containment constraint for candidate generation. We develop efficient query processing algorithms based on the novel paradigm. Moreover, candidate-pruning techniques and an improved graph edit distance verification algorithm are developed to boost the performance. In addition, a cost-aware graph partitioning method is devised to optimize the index. Extending the partition-based filtering paradigm, we present a solution to the top-$k$ k graph similarity search problem, where tailored filtering, look-ahead and computation-sharing strategies are exploited. Using both public real-life and synthetic datasets, extensive experiments demonstrate that our approaches significantly outperform the baseline and its alternatives.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yu:2018:DSS, author = "Weiren Yu and Xuemin Lin and Wenjie Zhang and Julie A. Mccann", title = "Dynamical {SimRank} search on time-varying networks", journal = j-VLDB-J, volume = "27", number = "1", pages = "79--104", month = feb, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0488-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Feb 6 18:41:42 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "SimRank is an appealing pair-wise similarity measure based on graph structure. It iteratively follows the intuition that two nodes are assessed as similar if they are pointed to by similar nodes. Many real graphs are large, and links are constantly subject to minor changes. In this article, we study the efficient dynamical computation of all-pairs SimRanks on time-varying graphs. Existing methods for the dynamical SimRank computation [e.g., LTSF (Shao et al. in PVLDB 8(8):838--849, 2015) and READS (Zhang et al. in PVLDB 10(5):601--612, 2017)] mainly focus on top-$k$ search with respect to a given query. For all-pairs dynamical SimRank search, Li et al.'s approach (Li et al. in EDBT, 2010) was proposed for this problem. It first factorizes the graph via a singular value decomposition (SVD) and then incrementally maintains such a factorization in response to link updates at the expense of exactness. As a result, all pairs of SimRanks are updated approximately, yielding $ O(r^4 n^2) $ time and $ O(r^2 n^2) $ memory in a graph with $n$ nodes, where r is the target rank of the low-rank SVD. Our solution to the dynamical computation of SimRank comprises of five ingredients: (1) We first consider edge update that does not accompany new node insertions. We show that the SimRank update {\varvec{\Delta }}{} \mathbf{S} \Delta S in response to every link update is expressible as a rank-one Sylvester matrix equation. This provides an incremental method requiring $ O(K n^2)$ time and $ O(n^2)$ memory in the worst case to update n^2n2 pairs of similarities for $K$ iterations. (2) To speed up the computation further, we propose a lossless pruning strategy that captures the ``affected areas'' of {\varvec{\Delta }}{} \mathbf{S} \Delta S to eliminate unnecessary retrieval. This reduces the time of the incremental SimRank to $ O(K(m + |{\textsf {AFF}}|))$, where $m$ is the number of edges in the old graph, and $ |{\textsf {AFF}}| (\le n^2)$ is the size of ``affected areas'' in $ \Delta S$, and in practice, $ |{\textsf {AFF}}| \ll n^2$. (3) We also consider edge updates that accompany node insertions, and categorize them into three cases, according to which end of the inserted edge is a new node. For each case, we devise an efficient incremental algorithm that can support new node insertions and accurately update the affected SimRanks. (4) We next study batch updates for dynamical SimRank computation, and design an efficient batch incremental method that handles ``similar sink edges'' simultaneously and eliminates redundant edge updates. (5) To achieve linear memory, we devise a memory-efficient strategy that dynamically updates all pairs of SimRanks column by column in just $ O(K n + m)$ memory, without the need to store all $ (n^2)$ pairs of old SimRank scores. Experimental studies on various datasets demonstrate that our solution substantially outperforms the existing incremental SimRank methods and is faster and more memory-efficient than its competitors on million-scale graphs.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Sagi:2018:NBE, author = "Tomer Sagi and Avigdor Gal", title = "Non-binary evaluation measures for big data integration", journal = j-VLDB-J, volume = "27", number = "1", pages = "105--126", month = feb, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0489-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Feb 6 18:41:42 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The evolution of data accumulation, management, analytics, and visualization has led to the coining of the term big data, which challenges the task of data integration. This task, common to any matching problem in computer science involves generating alignments between structured data in an automated fashion. Historically, set-based measures, based upon binary similarity matrices (match/non-match), have dominated evaluation practices of matching tasks. However, in the presence of big data, such measures no longer suffice. In this work, we propose evaluation methods for non-binary matrices as well. Non-binary evaluation is formally defined together with several new, non-binary measures using a vector space representation of matching outcome. We provide empirical analyses of the usefulness of non-binary evaluation and show its superiority over its binary counterparts in several problem domains.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wu:2018:SOR, author = "Yubao Wu and Xiang Zhang and Yuchen Bian and Zhipeng Cai and Xiang Lian and Xueting Liao and Fengpan Zhao", title = "Second-order random walk-based proximity measures in graph analysis: formulations and algorithms", journal = j-VLDB-J, volume = "27", number = "1", pages = "127--152", month = feb, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0490-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Feb 6 18:41:42 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Measuring the proximity between different nodes is a fundamental problem in graph analysis. Random walk-based proximity measures have been shown to be effective and widely used. Most existing random walk measures are based on the first-order Markov model, i.e., they assume that the next step of the random surfer only depends on the current node. However, this assumption neither holds in many real-life applications nor captures the clustering structure in the graph. To address the limitation of the existing first-order measures, in this paper, we study the second-order random walk measures, which take the previously visited node into consideration. While the existing first-order measures are built on node-to-node transition probabilities, in the second-order random walk, we need to consider the edge-to-edge transition probabilities. Using incidence matrices, we develop simple and elegant matrix representations for the second-order proximity measures. A desirable property of the developed measures is that they degenerate to their original first-order forms when the effect of the previous step is zero. We further develop Monte Carlo methods to efficiently compute the second-order measures and provide theoretical performance guarantees. Experimental results show that in a variety of applications, the second-order measures can dramatically improve the performance compared to their first-order counterparts.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yang:2018:PPC, author = "Bin Yang and Jian Dai and Chenjuan Guo and Christian S. Jensen and Jilin Hu", title = "{PACE}: a {PAth-CEntric} paradigm for stochastic path finding", journal = j-VLDB-J, volume = "27", number = "2", pages = "153--178", month = apr, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0491-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Mar 24 08:39:19 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "With the growing volumes of vehicle trajectory data, it becomes increasingly possible to capture time-varying and uncertain travel costs, e.g., travel time, in a road network. The current paradigm for doing so is edge-centric: it represents a road network as a weighted graph and splits trajectories into small fragments that fit the underlying edges to assign time-varying and uncertain weights to edges. It then applies path finding algorithms to the resulting, weighted graph. We propose a new PAth-CEntric paradigm, PACE, that targets more accurate and more efficient path cost estimation and path finding. By assigning weights to paths, PACE avoids splitting trajectories into small fragments. We solve two fundamental problems to establish the PACE paradigm: (i) how to compute accurately the travel cost distribution of a path and (ii) how to conduct path finding for a source---destination pair. To solve the first problem, given a departure time and a query path, we show how to select an optimal set of paths that cover the query path and such that the weights of the paths enable the most accurate joint cost distribution estimation for the query path. The joint cost distribution models well the travel cost dependencies among the edges in the query path, which in turn enables accurate estimation of the cost distribution of the query path. We solve the second problem by showing that the resulting path cost distribution estimation method satisfies an incremental property that enables the method to be integrated seamlessly into existing stochastic path finding algorithms. Further, we propose a new stochastic path finding algorithm that fully explores the improved accuracy and efficiency provided by PACE. Empirical studies with trajectory data from two different cities offer insight into the design properties of the PACE paradigm and offer evidence that PACE is accurate, efficient, and effective in real-world settings.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Hu:2018:RAP, author = "Jilin Hu and Bin Yang and Chenjuan Guo and Christian S. Jensen", title = "Risk-aware path selection with time-varying, uncertain travel costs: a time series approach", journal = j-VLDB-J, volume = "27", number = "2", pages = "179--200", month = apr, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0494-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Mar 24 08:39:19 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We address the problem of choosing the best paths among a set of candidate paths between the same origin---destination pair. This functionality is used extensively when constructing origin---destination matrices in logistics and flex transportation. Because the cost of a path, e.g., travel time, varies over time and is uncertain, there is generally no single best path. We partition time into intervals and represent the cost of a path during an interval as a random variable, resulting in an uncertain time series for each path. When facing uncertainties, users generally have different risk preferences, e.g., risk-loving or risk-averse, and thus prefer different paths. We develop techniques that, for each time interval, are able to find paths with non-dominated lowest costs while taking the users' risk preferences into account. We represent risk by means of utility function categories and show how the use of first-order and two kinds of second-order stochastic dominance relationships among random variables makes it possible to find all paths with non-dominated lowest costs. We report on empirical studies with large uncertain time series collections derived from a 2-year GPS data set. The study offers insight into the performance of the proposed techniques, and it indicates that the best techniques combine to offer an efficient and robust solution.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Su:2018:PDP, author = "Dong Su and Jianneng Cao and Ninghui Li and Min Lyu", title = "{PrivPfC}: differentially private data publication for classification", journal = j-VLDB-J, volume = "27", number = "2", pages = "201--223", month = apr, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0492-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Mar 24 08:39:19 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper, we tackle the problem of constructing a differentially private synopsis for the classification analysis. Several state-of-the-art methods follow the structure of existing classification algorithms and are all iterative, which is suboptimal due to the locally optimal choices and division of the privacy budget among many sequentially composed steps. We propose PrivPfC, a new differentially private method for releasing data for classification. The key idea underlying PrivPfC is to privately select, in a single step, a grid, which partitions the data domain into a number of cells. This selection is done by using the exponential mechanism with a novel quality function, which maximizes the expected number of correctly classified records by a histogram classifier. PrivPfC supports both the binary classification and the multiclass classification. Through extensive experiments on real datasets, we demonstrate PrivPfC 's superiority over the state-of-the-art methods.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhang:2018:AKS, author = "Dongxiang Zhang and Yuchen Li and Xin Cao and Jie Shao and Heng Tao Shen", title = "Augmented keyword search on spatial entity databases", journal = j-VLDB-J, volume = "27", number = "2", pages = "225--244", month = apr, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0497-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Mar 24 08:39:19 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper, we propose a new type of query that augments the spatial keyword search with an additional boolean expression constraint. The query is issued against a corpus of structured or semi-structured spatial entities and is very useful in applications like mobile search and targeted location-aware advertising. We devise three types of indexing and filtering strategies. First, we utilize the hybrid IR$^2$-tree and propose a novel hashing scheme for efficient pruning. Second, we propose an inverted index-based solution, named BE-Inv, that is more cache conscious and exhibits great pruning power for boolean expression matching. Our third method, named SKB-Inv, adopts a novel two-level partitioning scheme to organize the spatial entities into inverted lists and effectively facilitate the pruning in the spatial, textual, and boolean expression dimensions. In addition, we propose an adaptive query processing strategy that takes into account the selectivity of query keywords and predicates for early termination. We conduct our experiments using two real datasets with 3.5 million Foursquare venues and 50 million Twitter geo-profiles. The results show that the methods based on inverted index are superior to the hybrid {IR}$^2$-tree; and SKB-Inv achieves the best performance.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Deutch:2018:EPT, author = "Daniel Deutch and Amir Gilad and Yuval Moskovitch", title = "Efficient provenance tracking for datalog using top-$k$ queries", journal = j-VLDB-J, volume = "27", number = "2", pages = "245--269", month = apr, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0496-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Mar 24 08:39:19 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Highly expressive declarative languages, such as datalog, are now commonly used to model the operational logic of data-intensive applications. The typical complexity of such datalog programs, and the large volume of data that they process, call for result explanation. Results may be explained through the tracking and presentation of data provenance, defined here as the set of derivation trees of a given fact. While informative, the size of such full provenance information is typically too large and complex (even when compactly represented) to allow displaying it to the user. To this end, we propose a novel top-k query language for querying datalog provenance, supporting selection criteria based on tree patterns and ranking based on the rules and database facts used in derivation. We propose an efficient novel algorithm that computes in polynomial data complexity a compact representation of the top-k trees which may be explicitly constructed in linear time with respect to their size. We further experimentally study the algorithm performance, showing its scalability even for complex datalog programs where full provenance tracking is infeasible.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhou:2018:ARQ, author = "Junfeng Zhou and Jeffrey Xu Yu and Na Li and Hao Wei and Ziyang Chen and Xian Tang", title = "Accelerating reachability query processing based on {$ \vec {\rm DAG} $} reduction", journal = j-VLDB-J, volume = "27", number = "2", pages = "271--296", month = apr, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0495-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Mar 24 08:39:19 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Answering reachability queries is one of the fundamental graph operations. The existing approaches build indexes and answer reachability queries on a directed acyclic graph (DAG) GG, which is constructed by coalescing each strongly connected component of the given directed graph $ \mathcal {G} $ into a node of GG. Considering that GG can still be large to be processed efficiently, there are studies to further reduce GG to a smaller graph. However, these approaches suffer from either inefficiency in answering reachability queries, or cannot scale to large graphs. In this paper, we study DAG reduction to accelerate reachability query processing, which reduces the size of GG by computing transitive reduction (TR) followed by computing equivalence reduction (ER). For TR, we propose a bottom-up algorithm, namely buTR, which removes from GG all redundant edges to get the unique smallest DAGG$^{tGt}$ satisfying that G$^{tGt}$ has the same transitive closure as that of GG. For ER, we propose a divide-and-conquer algorithm, namely linear-ER. Given the result G$^{tGt}$ of TR, linear-ER gets a smaller DAGG$^\varepsilon $G in linear time based on equivalence relationship between nodes in GG. Our DAG reduction approaches (TR and ER) significantly improve the cost of time and space and can be scaled to large graphs. Based on the result of DAG reduction, we further propose a graph decomposition-based algorithm to efficiently answer reachability queries. We confirm the efficiency of our approaches by extensive experimental studies for TR, ER, and reachability query processing using 20 real datasets. The complete source code is available for download at https://pan.baidu.com/s/1skHBXXN.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Belesiotis:2018:STU, author = "Alexandros Belesiotis and Dimitrios Skoutas and Christodoulos Efstathiades and Vassilis Kaffes and Dieter Pfoser", title = "Spatio-textual user matching and clustering based on set similarity joins", journal = j-VLDB-J, volume = "27", number = "3", pages = "297--320", month = jun, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0498-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Jun 8 17:24:12 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This paper addresses the problem of matching and clustering users based on their geolocated posts. Individual posts are matched according to spatial distance and textual similarity thresholds. Then, user similarity is defined as the ratio of their posts that match each other. Based on these criteria, we introduce efficient algorithms for identifying pairs of matching users in a large dataset, as well as for computing the top-k matching pairs. We then proceed to identify spatio-textual user clusters. For this purpose, we use the Louvain method for community detection. Our algorithms operate on a user graph where edge weights represent spatio-textual user similarities. Since the exact user similarity graph can be prohibitively expensive to compute, we exploit our previous algorithms to derive efficient methods that reduce execution time both by avoiding to compute exact similarity scores and by reducing the number of similarity calculations performed. The presented solution allows a trade-off between computation time and quality of detected clusters. The proposed algorithms are evaluated using three real-world datasets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2018:GSG, author = "Lei Li and Kai Zheng and Sibo Wang and Wen Hua and Xiaofang Zhou", title = "Go slow to go fast: minimal on-road time route scheduling with parking facilities using historical trajectory", journal = j-VLDB-J, volume = "27", number = "3", pages = "321--345", month = jun, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0499-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Jun 8 17:24:12 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "For thousands of years, people have been innovating new technologies to make their travel faster, the latest of which is GPS technology that is used by millions of drivers every day. The routes recommended by a GPS device are computed by path planning algorithms (e.g., fastest path algorithm), which aim to minimize a certain objective function (e.g., travel time) under the current traffic condition. When the objective is to arrive the destination as early as possible, waiting during travel is not an option as it will only increase the total travel time due to the First-In-First-Out property of most road networks. However, some businesses such as logistics companies are more interested in optimizing the actual on-road time of their vehicles (i.e., while the engine is running) since it is directly related to the operational cost. At the same time, the drivers' trajectories, which can reveal the traffic conditions on the roads, are also collected by various service providers. Compared to the existing speed profile generation methods, which mainly rely on traffic monitor systems, the trajectory-based method can cover a much larger space and is much cheaper and flexible to obtain. This paper proposes a system, which has an online component and an offline component, to solve the minimal on-road time problem using the trajectories. The online query answering component studies how parking facilities along the route can be leveraged to avoid predicted traffic jam and eventually reduce the drivers' on-road time, while the offline component solves how to generate speed profiles of a road network from historical trajectories. The challenging part of the routing problem of the online component lies in the computational complexity when determining if it is beneficial to wait on specific parking places and the time of waiting to maximize the benefit. To cope with this challenging problem, we propose two efficient algorithms using minimum on-road travel cost function to answer the query. We further introduce several approximation methods to speed up the query answering, with an error bound guaranteed. The offline speed profile generation component makes use of historical trajectories to provide the traveling time for the online component. Extensive experiments show that our method is more efficient and accurate than baseline approaches extended from the existing path planning algorithms, and our speed profile is accurate and space efficient.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yao:2018:SDT, author = "Chang Yao and Meihui Zhang and Qian Lin and Beng Chin Ooi and Jiatao Xu", title = "Scaling distributed transaction processing and recovery based on dependency logging", journal = j-VLDB-J, volume = "27", number = "3", pages = "347--368", month = jun, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0500-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Jun 8 17:24:12 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Dependency graph-based concurrency control (DGCC) protocol has been shown to achieve good performance on multi-core in-memory system. DGCCseparates contention resolution from the transaction execution and employs dependency graphs to derive serializable transaction schedules. However, distributed transactions complicate the dependency resolution, and therefore, an effective transaction partitioning strategy is essential to reduce expensive multi-node distributed transactions. During failure recovery, log must be examined from the last checkpoint onward and the affected transactions are re-executed based on the way they are partitioned and executed. Existing approaches treat both transaction management and recovery as two separate problems, even though recovery is dependent on the sequence in which transactions are executed. In this paper, we propose to treat the transaction management and recovery problems as one. We first propose an efficient distributed dependency graph-based concurrency control (DistDGCC) protocol for handling transactions spanning multiple nodes and propose a new novel and efficient logging protocol called dependency logging that also makes use of dependency graphs for efficient logging and recovery. DistDGCC optimizes the average cost for each distributed transaction by processing transactions in batches. Moreover, it also reduces the effects of thread blocking caused by distributed transactions and consequently improves the runtime performance. Further, dependency logging exploits the same data structure that is used by DistDGCC to reduce the logging overhead, as well as the logical dependency information to improve the recovery parallelism. Extensive experiments are conducted to evaluate the performance of our proposed technique against state-of-the-art techniques. Experimental results show that DistDGCC is efficient and scalable, and dependency logging supports fast recovery with marginal runtime overhead. Hence, the overall system performance is significantly improved as a result.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chodpathumwan:2018:CEC, author = "Yodsawalai Chodpathumwan and Ali Vakilian and Arash Termehchy and Amir Nayyeri", title = "Cost-effective conceptual design using taxonomies", journal = j-VLDB-J, volume = "27", number = "3", pages = "369--394", month = jun, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0501-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Jun 8 17:24:12 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "It is known that annotating entities in unstructured and semi-structured datasets by their concepts improves the effectiveness of answering queries over these datasets. Ideally, one would like to annotate entities of all relevant concepts in a dataset. However, it takes substantial time and computational resources to annotate concepts in large datasets, and an organization may have sufficient resources to annotate only a subset of relevant concepts. Clearly, it would like to annotate a subset of concepts that provides the most effective answers to queries over the dataset. We propose a formal framework that quantifies the amount by which annotating entities of concepts from a taxonomy in a dataset improves the effectiveness of answering queries over the dataset. Because the problem is \mathbf {NP}NP-hard, we propose efficient approximation and pseudo-polynomial time algorithms for several cases of the problem. Our extensive empirical studies validate our framework and show accuracy and efficiency of our algorithms.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Shang:2018:PTS, author = "Shuo Shang and Lisi Chen and Zhewei Wei and Christian S. Jensen and Kai Zheng and Panos Kalnis", title = "Parallel trajectory similarity joins in spatial networks", journal = j-VLDB-J, volume = "27", number = "3", pages = "395--420", month = jun, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0502-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Jun 8 17:24:12 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The matching of similar pairs of objects, called similarity join, is fundamental functionality in data management. We consider two cases of trajectory similarity joins (TS-Joins), including a threshold-based join (Tb-TS-Join) and a top-k TS-Join (k-TS-Join), where the objects are trajectories of vehicles moving in road networks. Given two sets of trajectories and a threshold \theta `?, the Tb-TS-Join returns all pairs of trajectories from the two sets with similarity above \theta `?. In contrast, the k-TS-Join does not take a threshold as a parameter, and it returns the top-k most similar trajectory pairs from the two sets. The TS-Joins target diverse applications such as trajectory near-duplicate detection, data cleaning, ridesharing recommendation, and traffic congestion prediction. With these applications in mind, we provide purposeful definitions of similarity. To enable efficient processing of the TS-Joins on large sets of trajectories, we develop search space pruning techniques and enable use of the parallel processing capabilities of modern processors. Specifically, we present a two-phase divide-and-conquer search framework that lays the foundation for the algorithms for the Tb-TS-Join and the k-TS-Join that rely on different pruning techniques to achieve efficiency. For each trajectory, the algorithms first find similar trajectories. Then they merge the results to obtain the final result. The algorithms for the two joins exploit different upper and lower bounds on the spatiotemporal trajectory similarity and different heuristic scheduling strategies for search space pruning. Their per-trajectory searches are independent of each other and can be performed in parallel, and the mergings have constant cost. An empirical study with real data offers insight in the performance of the algorithms and demonstrates that they are capable of outperforming well-designed baseline algorithms by an order of magnitude.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lee:2018:PRA, author = "Juchang Lee and Wook-Shin Han and Hyoung Jun Na and Chang Gyoo Park and Kyu Hwan Kim and Deok Hoe Kim and Joo Yeon Lee and Sang Kyun Cha and Seunghyun Moon", title = "Parallel replication across formats for scaling out mixed {OLTP\slash OLAP} workloads in main-memory databases", journal = j-VLDB-J, volume = "27", number = "3", pages = "421--444", month = jun, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0503-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Jun 8 17:24:12 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Modern in-memory database systems are facing the need of efficiently supporting mixed workloads of OLTP and OLAP. A conventional approach to this requirement is to rely on ETL-style, application-driven data replication between two very different OLTP and OLAP systems, sacrificing real-time reporting on operational data. An alternative approach is to run OLTP and OLAP workloads in a single machine, which eventually limits the maximum scalability. In order to tackle this challenging problem, we propose a novel database replication architecture called HANA Asynchronous Parallel Table Replication (ATR). ATR supports OLTP workloads in one primary machine, while it supports heavy OLAP workloads in replicas. Here, row store formats can be used for OLTP transactions at the primary, while column store formats are used for OLAP analytical queries at the replicas. ATR is designed to support elastic scalability of OLAP query performance, while it minimizes the overhead for transaction processing at the primary and minimizes CPU consumption for replayed transactions at the replicas. ATR employs a novel optimistic lock-free parallel log replay scheme which exploits characteristics of multi-version concurrency control (MVCC) to enable real-time reporting by minimizing the propagation delay between the primary and replicas. It supports adaptive query routing depending on its predefined acceptable staleness range. Through extensive experiments with a concrete implementation available in a commercial product, we demonstrate that ATR achieves sub-second visibility delay even for update-intensive workloads, providing scalable OLAP performance without notable overhead to the primary. In addition, with extension of ATR to eager parallel replication, we demonstrate how the parallel log replay and its log-less replica recovery mechanisms improve run-time transaction performance under eager replication.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Choudhury:2018:FOL, author = "Farhana Murtaza Choudhury and J. Shane Culpepper and Zhifeng Bao and Timos Sellis", title = "Finding the optimal location and keywords in obstructed and unobstructed space", journal = j-VLDB-J, volume = "27", number = "4", pages = "445--470", month = aug, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0504-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Sep 8 07:39:26 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The problem of optimal location selection based on reverse k nearest neighbor (R kk NN) queries has been extensively studied in spatial databases. In this work, we present a related query, denoted as a Maximized Bichromatic Reverse Spatial Textual k Nearest Neighbor (MaxST) query, that finds an optimal location and a set of keywords for an object so that the object is a kk NN object for as many users as possible. Such a query has many practical applications including advertisements, where the query is to find the location and the text contents to include in an advertisement so that it is relevant to the maximum number of users. The visibility of the advertisements also has an important role in the users' interests. In this work, we address two instances of the spatial relevance when ranking items: (1) the Euclidean distance and (2) the visibility. We carefully design a series of index structures and approaches to answer the MaxST for both instances. Specifically, we present (1) the Grp-topk approach that requires the computation of the top-k objects for all of the users first and then applies various pruning techniques to find the optimal location and keywords; (2) the Indiv-U approach, where we use similarity estimations to avoid computing the top-k objects of the users that cannot be a final result; and (3) the Index-U approach where we propose a hierarchical index structure over the users to improve pruning. We show that the keyword selection component in MaxST queries is NP-hard and present both approximate and exact solutions for the problem.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yang:2018:ESC, author = "Jianye Yang and Wenjie Zhang and Shiyu Yang and Ying Zhang and Xuemin Lin and Long Yuan", title = "Efficient set containment join", journal = j-VLDB-J, volume = "27", number = "4", pages = "471--495", month = aug, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0505-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Sep 8 07:39:26 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "In this paper, we study the problem of set containment join. Given two collections $ \mathcal {R} $ and $ \mathcal {S} $ of records, the set containment join $ \mathcal {R} \bowtie_\subseteq \mathcal {S} $ retrieves all record pairs $ \{ (r, s) \} \in \mathcal {R} \times \mathcal {S} $ such that $ r \subseteq s r \subseteq s $. This problem has been extensively studied in the literature and has many important applications in commercial and scientific fields. Recent research focuses on the in-memory set containment join algorithms, and several techniques have been developed following intersection-oriented or union-oriented computing paradigms. Nevertheless, we observe that two computing paradigms have their limits due to the nature of the intersection and union operators. Particularly, intersection-oriented method relies on the intersection of the relevant inverted lists built on the elements of $ \mathcal {S} $. A nice property of the intersection-oriented method is that the join computation is verification free. However, the number of records explored during the join process may be large because there are multiple replicas for each record in $ \mathcal {S} $. On the other hand, the union-oriented method generates a signature for each record in $ \mathcal {R} $ and the candidate pairs are obtained by the union of the inverted lists of the relevant signatures. The candidate size of the union-oriented method is usually small because each record contributes only one replica in the index. Unfortunately, union-oriented method needs to verify the candidate pairs, which may be cost expensive especially when the join result size is large. As a matter of fact, the state-of-the-art union-oriented solution is not competitive compared to the intersection-oriented ones. In this paper, we propose a new union-oriented method, namely TT-Join, which not only enhances the advantage of the previous union-oriented methods but also integrates the goodness of intersection-oriented methods by imposing a variant of prefix tree structure. We conduct extensive experiments on 20 real-life datasets and synthetic datasets by comparing our method with 7 existing methods. The experiment results demonstrate that TT-Join significantly outperforms the existing algorithms on most of the datasets and can achieve up to two orders of magnitude speedup. Furthermore, to support large scale of datasets, we extend our techniques to distributed systems on top of MapReduce framework. With the help of careful designed load-aware distribution mechanisms, our distributed join algorithm can achieve up to an order of magnitude speedup than the baselines methods.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Hao:2018:DRU, author = "Shuang Hao and Nan Tang and Guoliang Li and Jian Li and Jianhua Feng", title = "Distilling relations using knowledge bases", journal = j-VLDB-J, volume = "27", number = "4", pages = "497--519", month = aug, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0506-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Sep 8 07:39:26 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Given a relational table, we study the problem of detecting and repairing erroneous data, as well as marking correct data, using well curated knowledge bases (KBs). We propose detective rules (DRs), a new type of data cleaning rules that can make actionable decisions on relational data, by building connections between a relation and a KB. The main invention is that a DR simultaneously models two opposite semantics of an attribute belonging to a relation using types and relationships in a KB: The positive semantics explains how its value should be linked to other attribute values in a correct tuple, and the negative semantics indicate how a wrong attribute value is connected to other correct attribute values within the same tuple. Naturally, a DR can mark correct values in a tuple if it matches the positive semantics. Meanwhile, a DR can detect/repair an error if it matches the negative semantics. We study fundamental problems associated with DRs, e.g., rule consistency and rule implication. We present efficient algorithms to apply DRs to clean a relation, based on rule order selection and inverted indexes. Moreover, we discuss approaches on how to generate DRs from examples. Extensive experiments, using both real-world and synthetic datasets, verify the effectiveness and efficiency of applying DRs in practice.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Borovica-Gajic:2018:SSR, author = "Renata Borovica-Gajic and Stratos Idreos and Anastasia Ailamaki and Marcin Zukowski and Campbell Fraser", title = "{Smooth Scan}: robust access path selection without cardinality estimation", journal = j-VLDB-J, volume = "27", number = "4", pages = "521--545", month = aug, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0507-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Sep 8 07:39:26 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Query optimizers depend heavily on statistics representing column distributions to create good query plans. In many cases, though, statistics are outdated or nonexistent, and the process of refreshing statistics is very expensive, especially for ad hoc workloads on ever bigger data. This results in suboptimal plans that severely hurt performance. The core of the problem is the fixed decision on the type of physical operators that comprise a query plan. This paper makes a case for continuous adaptation and morphing of physical operators throughout their lifetime, by adjusting their behavior in accordance with the observed statistical properties of the data at run time. We demonstrate the benefits of the new paradigm by designing and implementing an adaptive access path operator called Smooth Scan, which morphs continuously within the space of index access and full table scan. Smooth Scan behaves similarly to an index scan for low selectivity; if selectivity increases, however, Smooth Scan progressively morphs its behavior toward a sequential scan. As a result, a system with Smooth Scan requires no optimization decisions on the access paths up front. Additionally, by depending only on the result distribution and eschewing statistics and cardinality estimates altogether, Smooth Scan ensures repeatable execution across multiple query invocations. Smooth Scan implemented in PostgreSQL demonstrates robust, near-optimal performance on micro-benchmarks and real-life workloads, while being statistics oblivious at the same time.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Herrmann:2018:MSV, author = "Kai Herrmann and Hannes Voigt and Torben Bach Pedersen and Wolfgang Lehner", title = "Multi-schema-version data management: data independence in the twenty-first century", journal = j-VLDB-J, volume = "27", number = "4", pages = "547--571", month = aug, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0508-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Sep 8 07:39:26 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Agile software development allows us to continuously evolve and run a software system. However, this is not possible in databases, as established methods are very expensive, error-prone, and far from agile. We present InVerDa, a multi-schema-version database management system (MSVDB) for agile database development. MSVDBs realize co-existing schema versions within one database, where each schema version behaves like a regular single-schema database and write operations are propagated between schema versions. Developers use a relationally complete and bidirectional database evolution language (BiDEL) to easily evolve existing schema versions to new ones. BiDEL scripts are more robust, orders of magnitude shorter, and cause only a small performance overhead compared to handwritten SQL scripts. We formally guarantee data independence: no matter how the data of the co-existing schema versions is physically materialized, each schema version is guaranteed to behave like a regular database. Since, the chosen physical materialization significantly determines the overall performance, we equip database administrators with an advisor that proposes an optimized materialization for the current workload, which can improve the performance by orders of magnitude compared to na{\"\i}ve solutions. To our best knowledge, we are the first to facilitate agile evolution of production databases with full support of co-existing schema versions and formally guaranteed data independence.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Szlichta:2018:ECD, author = "Jaroslaw Szlichta and Parke Godfrey and Lukasz Golab and Mehdi Kargar and Divesh Srivastava", title = "Effective and complete discovery of bidirectional order dependencies via set-based axioms", journal = j-VLDB-J, volume = "27", number = "4", pages = "573--591", month = aug, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0510-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Sep 8 07:39:26 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Integrity constraints (ICs) are useful for expressing and enforcing application semantics. Formulating ICs manually, however, requires domain expertise, is prone to human error, and can be exceedingly time-consuming. Thus, methods for automatic discovery have been developed for some classes of ICs, such as functional dependencies (FDs), and recently, order dependencies (ODs). ODs properly subsume FDs and can express business rules involving order; e.g., an employee who pays higher taxes has a higher salary than another employee. Bidirectional ODs further allow different ordering directions, ascending and descending, as in SQL's order-by; e.g., a student with an alphabetically lower letter grade has a higher percentage grade than another student. We address the limitations of prior work on automatic OD discovery, which has factorial complexity, is incomplete, and is not concise. We present an efficient bidirectional OD discovery algorithm enabled by a novel polynomial mapping to a canonical form, and a sound and complete set of axioms for canonical bidirectional ODs to prune the search space. Our algorithm has exponential worst-case time complexity in the number of attributes and linear complexity in the number of tuples. We prove that it produces a complete and minimal set of bidirectional ODs, and we experimentally show orders of magnitude performance improvements over the prior state-of-the-art methodologies.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chaudhuri:2018:SIB, author = "Surajit Chaudhuri and Jayant R. Haritsa", title = "Special issue on best papers of {VLDB 2016}", journal = j-VLDB-J, volume = "27", number = "5", pages = "593--594", month = oct, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0520-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Oct 4 06:40:44 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Interlandi:2018:ADP, author = "Matteo Interlandi and Ari Ekmekji and Kshitij Shah and Muhammad Ali Gulzar and Sai Deep Tetali and Miryung Kim and Todd Millstein and Tyson Condie", title = "Adding data provenance support to {Apache Spark}", journal = j-VLDB-J, volume = "27", number = "5", pages = "595--615", month = oct, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0474-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Oct 4 06:40:44 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Debugging data processing logic in data-intensive scalable computing (DISC) systems is a difficult and time-consuming effort. Today's DISC systems offer very little tooling for debugging programs, and as a result, programmers spend countless hours collecting evidence (e.g., from log files) and performing trial-and-error debugging. To aid this effort, we built Titian, a library that enables data provenance--tracking data through transformations--in Apache Spark. Data scientists using the Titian Spark extension will be able to quickly identify the input data at the root cause of a potential bug or outlier result. Titian is built directly into the Spark platform and offers data provenance support at interactive speeds--orders of magnitude faster than alternative solutions--while minimally impacting Spark job performance; observed overheads for capturing data lineage rarely exceed 30\% above the baseline job execution time.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Eich:2018:EGQ, author = "Marius Eich and Pit Fender and Guido Moerkotte", title = "Efficient generation of query plans containing group-by, join, and groupjoin", journal = j-VLDB-J, volume = "27", number = "5", pages = "617--641", month = oct, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0476-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Oct 4 06:40:44 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "It has been a recognized fact for many years that query execution can benefit from pushing grouping operators down in the operator tree and applying them before a join. This so-called eager aggregation reduces the size(s) of the join argument(s), making join evaluation faster. Lately, the idea enjoyed a revival when it was applied to outer joins for the first time and incorporated in a state-of-the-art plan generator. However, the recent approach is highly dependent on the use of heuristics because of the exponential growth of the search space that goes along with eager aggregation. Finding an optimal solution for larger queries calls for effective optimality-preserving pruning mechanisms to reduce the search space size as far as possible. By a more thorough investigation of functional dependencies and keys, we provide a set of new pruning criteria and extend the idea of eager aggregation further by combining it with the introduction of groupjoins. We evaluate the resulting plan generator with respect to runtime and memory consumption.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Leis:2018:QOT, author = "Viktor Leis and Bernhard Radke and Andrey Gubichev and Atanas Mirchev and Peter Boncz and Alfons Kemper and Thomas Neumann", title = "Query optimization through the looking glass, and what we found running the {Join Order Benchmark}", journal = j-VLDB-J, volume = "27", number = "5", pages = "643--668", month = oct, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0480-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Oct 4 06:40:44 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Finding a good join order is crucial for query performance. In this paper, we introduce the Join Order Benchmark that works on real-life data riddled with correlations and introduces 113 complex join queries. We experimentally revisit the main components in the classic query optimizer architecture using a complex, real-world data set and realistic multi-join queries. For this purpose, we describe cardinality-estimate injection and extraction techniques that allow us to compare the cardinality estimators of multiple industrial SQL implementations on equal footing, and to characterize the value of having perfect cardinality estimates. Our investigation shows that all industrial-strength cardinality estimators routinely produce large errors: though cardinality estimation using table samples solves the problem for single-table queries, there are still no techniques in industrial systems that can deal accurately with join-crossing correlated query predicates. We further show that while estimates are essential for finding a good join order, query performance is unsatisfactory if the query engine relies too heavily on these estimates. Using another set of experiments that measure the impact of the cost model, we find that it has much less influence on query performance than the cardinality estimates. We investigate plan enumeration techniques comparing exhaustive dynamic programming with heuristic algorithms and find that exhaustive enumeration improves performance despite the suboptimal cardinality estimates. Finally, we extend our investigation from main-memory only, to also include disk-based query processing. Here, we find that though accurate cardinality estimation should be the first priority, other aspects such as modeling random versus sequential I/O are also important to predict query runtime.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Makreshanski:2018:MQJ, author = "Darko Makreshanski and Georgios Giannikis and Gustavo Alonso and Donald Kossmann", title = "Many-query join: efficient shared execution of relational joins on modern hardware", journal = j-VLDB-J, volume = "27", number = "5", pages = "669--692", month = oct, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0475-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Oct 4 06:40:44 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Database architectures typically process queries one at a time, executing concurrent queries in independent execution contexts. Often, such a design leads to unpredictable performance and poor scalability. One approach to circumvent the problem is to take advantage of sharing opportunities across concurrently running queries. In this paper, we propose many-query join (MQJoin), a novel method for sharing the execution of a join that can efficiently deal with hundreds of concurrent queries. This is achieved by minimizing redundant work and making efficient use of main-memory bandwidth and multi-core architectures. Compared to existing proposals, MQJoin is able to efficiently handle larger workloads regardless of the schema by exploiting more sharing opportunities. We also compared MQJoin to two commercial main-memory column-store databases. For a TPC-H-based workload, we show that MQJoin provides 2---5 $ \times $ higher throughput with significantly more stable response times.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Brucato:2018:PQE, author = "Matteo Brucato and Azza Abouzied and Alexandra Meliou", title = "Package queries: efficient and scalable computation of high-order constraints", journal = j-VLDB-J, volume = "27", number = "5", pages = "693--718", month = oct, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0483-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Oct 4 06:40:44 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Traditional database queries follow a simple model: they define constraints that each tuple in the result must satisfy. This model is computationally efficient, as the database system can evaluate the query conditions on each tuple individually. However, many practical, real-world problems require a collection of result tuples to satisfy constraints collectively, rather than individually. In this paper, we present package queries, a new query model that extends traditional database queries to handle complex constraints and preferences over answer sets. We develop a full-fledged package query system, implemented on top of a traditional database engine. Our work makes several contributions. (1) We design PaQL, a SQL-based query language that supports the declarative specification of package queries. We prove that PaQL is at least as expressive as integer linear programming, and therefore, evaluation of package queries is NP-hard. (2) We present a fundamental evaluation strategy that combines the capabilities of databases and constraint optimization solvers to derive solutions to package queries. The core of our approach is a set of translation rules that transform a package query to an integer linear program. (3) We introduce an offline data partitioning strategy allowing query evaluation to scale to large data sizes. (4) We introduce SketchRefine, a scalable algorithm for package evaluation, with strong approximation guarantees [(1 \pm \varepsilon )(1 ?)-factor approximation]. (5) We present a method for parallelizing the Refine phase of SketchRefine. (6) We present an empirical study of the efficiency gains of providing integer solvers with starting solutions. (7) We present extensive experiments over real-world and benchmark data. The results demonstrate that our methods are effective at deriving high-quality package results and achieve runtime performance that is an order of magnitude faster than directly using ILP solvers over large datasets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Elgohary:2018:CLA, author = "Ahmed Elgohary and Matthias Boehm and Peter J. Haas and Frederick R. Reiss and Berthold Reinwald", title = "Compressed linear algebra for large-scale machine learning", journal = j-VLDB-J, volume = "27", number = "5", pages = "719--744", month = oct, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0478-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Oct 4 06:40:44 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Large-scale machine learning algorithms are often iterative, using repeated read-only data access and I/O-bound matrix--vector multiplications to converge to an optimal model. It is crucial for performance to fit the data into single-node or distributed main memory and enable fast matrix--vector operations on in-memory data. General-purpose, heavy- and lightweight compression techniques struggle to achieve both good compression ratios and fast decompression speed to enable block-wise uncompressed operations. Therefore, we initiate work --- inspired by database compression and sparse matrix formats --- on value-based compressed linear algebra (CLA), in which heterogeneous, lightweight database compression techniques are applied to matrices, and then linear algebra operations such as matrix--vector multiplication are executed directly on the compressed representation. We contribute effective column compression schemes, cache-conscious operations, and an efficient sampling-based compression algorithm. Our experiments show that CLA achieves in-memory operations performance close to the uncompressed case and good compression ratios, which enables fitting substantially larger datasets into available memory. We thereby obtain significant end-to-end performance improvements up to $ 9.2 \times $.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chai:2018:POB, author = "Chengliang Chai and Guoliang Li and Jian Li and Dong Deng and Jianhua Feng", title = "A partial-order-based framework for cost-effective crowdsourced entity resolution", journal = j-VLDB-J, volume = "27", number = "6", pages = "745--770", month = dec, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0509-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Feb 5 08:07:20 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Crowdsourced entity resolution has recently attracted significant attentions because it can harness the wisdom of crowd to improve the quality of entity resolution. However, existing techniques either cannot achieve high quality or incur huge monetary costs. To address these problems, we propose a cost-effective crowdsourced entity resolution framework, which significantly reduces the monetary cost while keeping high quality. We first define a partial order on the pairs of records. Then, we select a pair as a question and ask the crowd to check whether the records in the pair refer to the same entity. After getting the answer of this pair, we infer the answers of other pairs based on the partial order. Next, we iteratively select pairs without answers to ask until we get the answers of all pairs. We devise effective algorithms to judiciously select the pairs to ask in order to minimize the number of asked pairs. To further reduce the cost, we propose a grouping technique to group the pairs and we only ask one pair instead of all pairs in each group. We develop error-tolerant techniques to tolerate the errors introduced by the partial order and the crowd. We also study the budget-aware entity resolution, which, given a budget, finds the maximum number of matching pairs within the budget, and propose effective optimization techniques. Experimental results show that our method reduces the cost to 1.25\% of existing approaches (or existing approaches take 80\times 80$ \times $ monetary cost of our method) while not sacrificing the quality.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Roblot:2018:PCC, author = "Tania Roblot and Miika Hannula and Sebastian Link", title = "Probabilistic Cardinality Constraints", journal = j-VLDB-J, volume = "27", number = "6", pages = "771--795", month = dec, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0511-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Feb 5 08:07:20 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Probabilistic databases address the requirements of applications that produce large collections of uncertain data. They should provide declarative means to control the integrity of data. Cardinality constraints, in particular, control the occurrences of data patterns by declaring in how many records a combination of data values can occur. We propose cardinality constraints on probabilistic data, which stipulate lower bounds on the marginal probability by which a cardinality constraint holds. We investigate limits and opportunities for automating their use in integrity control. This includes hardness results for their validation, axiomatic and efficient algorithmic characterisations of their implication problem, and an algorithm that computes succinct semantic summaries for any collection of these constraints. Experiments complement our theoretical analysis on the time and space complexity of computing semantic summaries, suggesting that their computation provides the basis to acquire meaningful constraints. We also establish evidence that probabilistic functional and inclusion dependencies cannot be managed as simply as probabilistic cardinality constraints.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Bress:2018:GCC, author = "Sebastian Bre{\ss} and Bastian K{\"o}cher and Henning Funke and Steffen Zeuch and Tilmann Rabl and Volker Markl", title = "Generating custom code for efficient query execution on heterogeneous processors", journal = j-VLDB-J, volume = "27", number = "6", pages = "797--822", month = dec, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0512-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Feb 5 08:07:20 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Processor manufacturers build increasingly specialized processors to mitigate the effects of the power wall in order to deliver improved performance. Currently, database engines have to be manually optimized for each processor which is a costly and error- prone process. In this paper, we propose concepts to adapt to and to exploit the performance enhancements of modern processors automatically. Our core idea is to create processor-specific code variants and to learn a well-performing code variant for each processor. These code variants leverage various parallelization strategies and apply both generic- and processor-specific code transformations. Our experimental results show that the performance of code variants may diverge up to two orders of magnitude. In order to achieve peak performance, we generate custom code for each processor. We show that our approach finds an efficient custom code variant for multi-core CPUs, GPUs, and MICs.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zoumpatianos:2018:GDS, author = "Kostas Zoumpatianos and Yin Lou and Ioana Ileana and Themis Palpanas and Johannes Gehrke", title = "Generating data series query workloads", journal = j-VLDB-J, volume = "27", number = "6", pages = "823--846", month = dec, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0513-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Feb 5 08:07:20 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Data series (including time series) has attracted lots of interest in recent years. Most of the research has focused on how to efficiently support similarity or nearest neighbor queries over large data series collections (an important data mining task), and several data series summarization and indexing methods have been proposed in order to solve this problem. Up to this point, very little attention has been paid to properly evaluating such index structures, with most previous works relying solely on randomly selected data series to use as queries. In this work, we show that random workloads are inherently not suitable for the task at hand and we argue that there is a need for carefully generating query workloads. We define measures that capture the characteristics of queries, and we propose a method for generating workloads with the desired properties, that is, effectively evaluating and comparing data series summarizations and indexes. In our experimental evaluation, with carefully controlled query workloads, we shed light on key factors affecting the performance of nearest neighbor search in large data series collections. This is the first paper that introduces a method for quantifying hardness of data series queries, as well as the ability to generate queries of predefined hardness.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{To:2018:SSM, author = "Quoc-Cuong To and Juan Soto and Volker Markl", title = "A survey of state management in big data processing systems", journal = j-VLDB-J, volume = "27", number = "6", pages = "847--872", month = dec, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0514-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Feb 5 08:07:20 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The concept of state and its applications vary widely across big data processing systems. This is evident in both the research literature and existing systems, such as Apache Flink, Apache Heron, Apache Samza, Apache Spark, and Apache Storm. Given the pivotal role that state management plays, particularly, for iterative batch and stream processing, in this survey, we present examples of state as an enabler, discuss the alternative approaches used to handle and implement state, capture the many facets of state management, and highlight new research directions. Our aim is to provide insight into disparate state management techniques, motivate others to pursue research in this area, and draw attention to open problems.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Liu:2018:ACE, author = "Yuchen Liu and Hai Liu and Dongqing Xiao and Mohamed Y. Eltabakh", title = "Adaptive correlation exploitation in big data query optimization", journal = j-VLDB-J, volume = "27", number = "6", pages = "873--898", month = dec, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0515-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Feb 5 08:07:20 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Correlations among the data attributes are abundant and inherent in most application domains. These correlations, if managed in systematic and efficient ways, would enable various optimization opportunities. Unfortunately, the state-of-art techniques are all heavily tailored toward optimizing factors intrinsic to relational databases, e.g., predicate selectivity, random I/O accesses, and secondary indexes, which are mostly not applicable to the modern big data infrastructures, e.g., Hadoop and Spark. In this paper, we propose the EXORD^++ system for exploiting the data's correlations in big data query optimization. EXORD^++ supports two types of correlations; hard (which does not allow for exceptions) and soft (which allows for exceptions). We introduce a three-phase approach for managing soft correlations including: (1) validating and judging the worthiness of soft correlations, (2) selecting and preparing the soft correlations for deployment, and (3) exploiting the correlations in query optimization. EXORD^++ introduces a novel cost-benefit model for adaptively selecting the most beneficial soft correlations given a query workload. We show the complexity of this problem (NP-Hard) and propose a heuristic to efficiently solve it in a polynomial time. Moreover, we present incremental maintenance algorithms for efficiently updating the system's state under data appends and workload changes. EXORD^++ prototype is implemented as an extension to the Hive engine on top of Hadoop. The experimental evaluation shows the potential of EXORD^++ in achieving more than 10x speedup while introducing minimal storage overheads.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2018:EEM, author = "Tianzheng Wang and Ryan Johnson and Alan Fekete and Ippokratis Pandis", title = "Erratum to: {Efficiently making (almost) any concurrency control mechanism serializable}", journal = j-VLDB-J, volume = "27", number = "6", pages = "899--900", month = dec, year = "2018", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-017-0471-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Feb 5 08:07:20 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "See \cite{Wang:2017:EMA}.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Rahman:2019:OGF, author = "Habibur Rahman and Senjuti Basu Roy and Saravanan Thirumuruganathan and Sihem Amer-Yahia and Gautam Das", title = "Optimized group formation for solving collaborative tasks", journal = j-VLDB-J, volume = "28", number = "1", pages = "1--23", month = feb, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0516-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Feb 5 08:07:20 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Many popular applications, such as collaborative document editing, sentence translation, or citizen science, resort to collaborative crowdsourcing, a special form of human-based computing, where, crowd workers with appropriate skills and expertise are required to form groups to solve complex tasks. While there has been extensive research on workers' task assignment for traditional microtask-based crowdsourcing, they often ignore the critical aspect of collaboration. Central to any collaborative crowdsourcing process is the aspect of solving collaborative tasks that requires successful collaboration among the workers. Our formalism considers two main collaboration-related factors--affinity and upper critical mass--appropriately adapted from organizational science and social theories. Our contributions are threefold. First, we formalize the notion of collaboration among crowd workers and propose a comprehensive optimization model for task assignment in a collaborative crowdsourcing environment. Next, we study the hardness of the task assignment optimization problem and propose a series of efficient exact and approximation algorithms with provable theoretical guarantees. Finally, we present a detailed set of experimental results stemming from two real-world collaborative crowdsourcing application using Amazon Mechanical Turk.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wu:2019:VFS, author = "Zhiqiang Wu and Kenli Li", title = "{VBTree}: forward secure conjunctive queries over encrypted data for cloud computing", journal = j-VLDB-J, volume = "28", number = "1", pages = "25--46", month = feb, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0517-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Feb 5 08:07:20 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This paper concerns the fundamental problem of processing conjunctive keyword queries over an outsourced data table on untrusted public clouds in a privacy-preserving manner. The data table can be properly implemented with tree-based searchable symmetric encryption schemes, such as the known Keyword Red---Black tree and the Indistinguishable Bloom-filter Tree in ICDE'17. However, as for these trees, there still exist many limitations to support sub-linear time updates. One of the reasons is that their tree branches are directly exposed to the cloud. To achieve efficient conjunctive queries while supporting dynamic updates, we introduce a novel tree data structure called virtual binary tree (VBTree). Our key design is to organize indexing elements into the VBTree in a top-down fashion, without storing any tree branches and tree nodes. The tree only exists in a logical view, and all of the elements are actually stored in a hash table. To achieve forward privacy, which is discussed by Bost in CCS'16, we also propose a storage mechanism called version control repository (VCR), to record and control versions of keywords and queries. VCR has a smaller client-side storage compared to other forward-private schemes. With our proposed approach, data elements can be quickly searched while the index can be privately updated. The security of the VBTree is formally proved under the IND-CKA2 model. We test our scheme on a real e-mail dataset and a user location dataset. The testing results demonstrate its high efficiency and scalability in both searching and updating processes.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lee:2019:PFP, author = "Seokki Lee and Bertram Lud{\"a}scher and Boris Glavic", title = "{PUG}: a framework and practical implementation for why and why-not provenance", journal = j-VLDB-J, volume = "28", number = "1", pages = "47--71", month = feb, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0518-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Feb 5 08:07:20 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Explaining why an answer is (or is not) returned by a query is important for many applications including auditing, debugging data and queries, and answering hypothetical questions about data. In this work, we present the first practical approach for answering such questions for queries with negation (first-order queries). Specifically, we introduce a graph-based provenance model that, while syntactic in nature, supports reverse reasoning and is proven to encode a wide range of provenance models from the literature. The implementation of this model in our PUG (Provenance Unification through Graphs) system takes a provenance question and Datalog query as an input and generates a Datalog program that computes an explanation, i.e., the part of the provenance that is relevant to answer the question. Furthermore, we demonstrate how a desirable factorization of provenance can be achieved by rewriting an input query. We experimentally evaluate our approach demonstrating its efficiency.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2019:SSS, author = "Wenlu Wang and Ji Zhang and Min-Te Sun and Wei-Shinn Ku", title = "A scalable spatial skyline evaluation system utilizing parallel independent region groups", journal = j-VLDB-J, volume = "28", number = "1", pages = "73--98", month = feb, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0519-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Feb 5 08:07:20 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "This research presents two parallel solutions to efficiently address spatial skyline queries. First, we propose a novel concept called independent regions for parallelizing the process of spatial skyline evaluation. Spatial skyline candidates in an independent region do not depend on any data point in other independent regions. Then, we propose a GPU-based solution. We use multi-level independent region group-based parallel filter to support efficient multi-threading spatial skyline non-candidate elimination. Beyond that, we propose comparable region to accelerate non-candidate elimination in each independent region. Secondly, we propose a MapReduce-based solution. We generate the convex hull of query points in the first MapReduce phase. In the second phase, we calculate independent regions based on the input data points and the convex hull of the query points. With the independent regions, spatial skylines are evaluated in parallel in the third phase, in which data points are partitioned by their associated independent regions in map functions, and spatial skyline candidates are calculated by reduce functions. The results of the spatial skyline queries are the union of outputs from the reduce functions. Our experimental results show that GPU multi-threading scheme is very efficient on small-scale input datasets. On the contrary, MapReduce scheme performs very well on large-scale input datasets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2019:APS, author = "Yue Wang and Lei Chen and Yulin Che and Qiong Luo", title = "Accelerating pairwise {SimRank} estimation over static and dynamic graphs", journal = j-VLDB-J, volume = "28", number = "1", pages = "99--122", month = feb, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0521-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Feb 5 08:07:20 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Measuring similarities among different vertices is a fundamental problem in graph analysis. Among different similarity measurements, SimRank is one of the most promising and popular. In reality, instead of computing the whole similarity matrix, people often issue SimRank queries in a pairwise manner, each of which needs to estimate an approximate SimRank value within a specified accuracy for a given pair of nodes. These pairwise SimRank queries are often processed on real-life graphs, which typically evolve over time, requiring efficient algorithms that can query pairwise SimRank under dynamic graph updates. However, current single-pair SimRank solutions are either static or inefficient in handling dynamic cases with good-quality results. Observing that the sample size is the major factor that determines the efficiency and the accuracy in Monte Carlo methods to estimate pairwise SimRank, in this paper, we propose three algorithms to query pairwise SimRank over static and dynamic graphs efficiently, by using different sample reduction strategies. The accuracy of our algorithms is guaranteed by the different invariants we propose for pairwise SimRank. We show that our algorithms outperform the state-of-the-art static and dynamic solutions for pairwise SimRank estimation.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhao:2019:EMC, author = "Kaiqi Zhao and Gao Cong and Jin-Yao Chin and Rong Wen", title = "Exploring market competition over topics in spatio-temporal document collections", journal = j-VLDB-J, volume = "28", number = "1", pages = "123--145", month = feb, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0522-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Feb 5 08:07:20 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "With the prominence of location-based services and social networks in recent years, huge amounts of spatio-temporal document collections (e.g., geo-tagged tweets) have been generated. These data collections often imply user's ideas on different products and thus are helpful for business owners to explore hot topics of their brands and the competition relation to other brands in different spatial regions during different periods. In this work, we aim to mine the topics and the market competition of different brands over each topic for a category of business (e.g., coffeehouses) from spatio-temporal documents within a user-specified region and time period. To support such spatio-temporal search online in an exploratory manner, we propose a novel framework equipped by (1) a generative model for mining topics and market competition, (2) an Octree-based off-line pre-training method for the model and (3) an efficient algorithm for combining pre-trained models to return the topics and market competition on each topic within a user-specified pair of region and time span. Extensive experiments show that our framework is able to improve the runtime by up to an order of magnitude compared with baselines while achieving similar model quality in terms of training log-likelihood.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Picado:2019:LSE, author = "Jose Picado and Arash Termehchy and Alan Fern and Parisa Ataei", title = "Logical scalability and efficiency of relational learning algorithms", journal = j-VLDB-J, volume = "28", number = "2", pages = "147--171", month = apr, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0523-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon May 20 17:17:01 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Relational learning algorithms learn the definition of a new relation in terms of existing relations in the database. The same database may be represented under different schemas for various reasons, such as efficiency, data quality, and usability. Unfortunately, the output of current relational learning algorithms tends to vary quite substantially over the choice of schema, both in terms of learning accuracy and efficiency. We introduce the property of schema independence of relational learning algorithms, and study both the theoretical and empirical dependence of existing algorithms on the common class of (de) composition schema transformations. We show theoretically and empirically that current relational learning algorithms are generally not schema independent. We propose Castor, a relational learning algorithm that achieves schema independence.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Aluc:2019:BSC, author = "G{\"u}nes Alu{\c{c}} and M. Tamer {\"O}zsu and Khuzaima Daudjee", title = "Building self-clustering {RDF} databases using {Tunable-LSH}", journal = j-VLDB-J, volume = "28", number = "2", pages = "173--195", month = apr, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0530-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon May 20 17:17:01 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The Resource Description Framework (RDF) is a W3C standard for representing graph-structured data, and SPARQL is the standard query language for RDF. Recent advances in information extraction, linked data management and the Semantic Web have led to a rapid increase in both the volume and the variety of RDF data that are publicly available. As businesses start to capitalize on RDF data, RDF data management systems are being exposed to workloads that are far more diverse and dynamic than what they were designed to handle. Consequently, there is a growing need for developing workload-adaptive and self-tuning RDF data management systems. To realize this objective, we introduce a fast and efficient method for dynamically clustering records in an RDF data management system. Specifically, we assume nothing about the workload upfront, but as SPARQL queries are executed, we keep track of records that are co-accessed by the queries in the workload and physically cluster them. To decide dynamically and in constant-time where a record needs to be placed in the storage system, we develop a new locality-sensitive hashing (LSH) scheme, Tunable-LSH. Using Tunable-LSH, records that are co-accessed across similar sets of queries can be hashed to the same or nearby physical pages in the storage system. What sets Tunable-LSH apart from existing LSH schemes is that it can auto-tune to achieve the aforementioned clustering objective with high accuracy even when the workloads change. Experimental evaluation of Tunable-LSH in an RDF data management system as well as in a standalone hashtable shows end-to-end performance gains over existing solutions.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhou:2019:RTC, author = "Xiangmin Zhou and Dong Qin and Lei Chen and Yanchun Zhang", title = "Real-time context-aware social media recommendation", journal = j-VLDB-J, volume = "28", number = "2", pages = "197--219", month = apr, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0524-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon May 20 17:17:01 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Social media recommendation has attracted great attention due to its wide applications in online advertisement and entertainment, etc. Since contexts highly affect social user preferences, great effort has been put into context-aware recommendation in recent years. However, existing techniques cannot capture the optimal context information that is most discriminative and compact from a large number of available features flexibly for effective and efficient context-aware social recommendation. To address this issue, we propose a generic framework for context-aware recommendation in shared communities, which exploits the characteristics of media content and contexts. Specifically, we first propose a novel approach based on the correlation between a feature and a group of other ones for selecting the optimal features used in recommendation, which fully removes the redundancy. Then, we propose a graph-based model called content---context interaction graph, by analysing the metadata content and social contexts, and the interaction between attributes. Finally, we design hash-based index over Apache Storm for organizing and searching the media database in real time. Extensive experiments have been conducted over large real media collections to prove the high effectiveness and efficiency of our proposed framework.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ntaflos:2019:UAB, author = "Lefteris Ntaflos and George Trimponias and Dimitris Papadias", title = "A unified agent-based framework for constrained graph partitioning", journal = j-VLDB-J, volume = "28", number = "2", pages = "221--241", month = apr, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0526-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon May 20 17:17:01 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Social networks offer various services such as recommendations of social events, or delivery of targeted advertising material to certain users. In this work, we focus on a specific type of services modeled as constrained graph partitioning (CGP). CGP assigns users of a social network to a set of classes with bounded capacities so that the similarity and the social costs are minimized. The similarity cost is proportional to the dissimilarity between a user and his class, whereas the social cost is measured in terms of friends that are assigned to different classes. In this work, we investigate two solutions for CGP. The first utilizes a game-theoretic framework, where each user constitutes a player that wishes to minimize his own social and similarity cost. The second employs local search, and aims at minimizing the global cost. We show that the two approaches can be unified under a common agent-based framework that allows for two types of deviations. In a unilateral deviation, an agent switches to a new class, whereas in a bilateral deviation a pair of agents exchange their classes. We develop a number of optimization techniques to improve result quality and facilitate efficiency. Our experimental evaluation on real datasets demonstrates that the proposed methods always outperform the state of the art in terms of solution quality, while they are up to an order of magnitude faster.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Omidvar-Tehrani:2019:UGA, author = "Behrooz Omidvar-Tehrani and Sihem Amer-Yahia and Ria Mae Borromeo", title = "User group analytics: hypothesis generation and exploratory analysis of user data", journal = j-VLDB-J, volume = "28", number = "2", pages = "243--266", month = apr, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0527-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon May 20 17:17:01 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "User data is becoming increasingly available in multiple domains ranging from the social Web to retail store receipts. User data is described by user demographics (e.g., age, gender, occupation) and user actions (e.g., rating a movie, publishing a paper, following a medical treatment). The analysis of user data is appealing to scientists who work on population studies, online marketing, recommendations, and large-scale data analytics. User data analytics usually relies on identifying group-level behavior such as ``Asian women who publish regularly in databases.'' Group analytics addresses peculiarities of user data such as noise and sparsity to enable insights. In this paper, we introduce a framework for user group analytics by developing several components which cover the life cycle of user groups. We provide two different analytical environments to support ``hypothesis generation'' and ``exploratory analysis'' on user groups. Experiments on datasets with different characteristics show the usability and efficiency of our group analytics framework.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2019:LSR, author = "Xubo Wang and Lu Qin and Xuemin Lin and Ying Zhang and Lijun Chang", title = "Leveraging set relations in exact and dynamic set similarity join", journal = j-VLDB-J, volume = "28", number = "2", pages = "267--292", month = apr, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0529-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon May 20 17:17:01 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Set similarity join, which finds all the similar set pairs from two collections of sets, is a fundamental problem with a wide range of applications. Existing works study both exact set similarity join and approximate similarity join problems. In this paper, we focus on the exact set similarity join problem. The existing solutions for exact set similarity join follow a filtering-verification framework, which generates a list of candidate pairs through scanning indexes in the filtering phase and reports those similar pairs in the verification phase. Though much research has been conducted on this problem, set relations have not been well studied on improving the algorithm efficiency through computational cost sharing. Therefore, in this paper, we explore the set relations in different levels to reduce the overall computational cost. First, it has been shown that most of the computational time is spent on the filtering phase, which can be quadratic to the number of sets in the worst case for the existing solutions. Thus, we explore index-level set relations to reduce the filtering cost while keeping the same filtering power. We achieve this by grouping related sets into blocks in the index and skipping useless index probes in joins. Second, we explore answer-level set relations to further improve the algorithm based on the intuition that if two sets are similar, their answers may have a large overlap. We derive an algorithm which incrementally generates the answer of one set from an already computed answer of another similar set rather than compute the answer from scratch to reduce the computational cost. In addition, considering that in real applications, the data are usually updated dynamically, we extend our techniques and design efficient algorithms to incrementally update the join result when any element in the sets is updated. Finally, we conduct extensive performance studies using 21 real datasets with various data properties from a wide range of domains. The experimental results demonstrate that our algorithm outperforms all the existing algorithms across all datasets.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Amer-Yahia:2019:TID, author = "Sihem Amer-Yahia and Lei Chen and Ren{\'e}e J. Miller", title = "Thematic issue on data management for graphs", journal = j-VLDB-J, volume = "28", number = "3", pages = "293--294", month = jun, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00543-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Oct 2 07:30:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Cebiric:2019:SSG, author = "{\v{S}}ejla Cebiri{\'c} and Fran{\c{c}}ois Goasdou{\'e} and Haridimos Kondylakis and Dimitris Kotzinos and Ioana Manolescu and Georgia Troullinou and Mussab Zneika", title = "Summarizing semantic graphs: a survey", journal = j-VLDB-J, volume = "28", number = "3", pages = "295--327", month = jun, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0528-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Oct 2 07:30:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "The explosion in the amount of the available RDF data has lead to the need to explore, query and understand such data sources. Due to the complex structure of RDF graphs and their heterogeneity, the exploration and understanding tasks are significantly harder than in relational databases, where the schema can serve as a first step toward understanding the structure. Summarization has been applied to RDF data to facilitate these tasks. Its purpose is to extract concise and meaningful information from RDF knowledge bases, representing their content as faithfully as possible. There is no single concept of RDF summary, and not a single but many approaches to build such summaries; each is better suited for some uses, and each presents specific challenges with respect to its construction. This survey is the first to provide a comprehensive survey of summarization method for semantic RDF graphs. We propose a taxonomy of existing works in this area, including also some closely related works developed prior to the adoption of RDF in the data management community; we present the concepts at the core of each approach and outline their main technical aspects and implementation. We hope the survey will help readers understand this scientifically rich area and identify the most pertinent summarization method for a variety of usage scenarios.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Demirci:2019:CAP, author = "Gunduz Vehbi Demirci and Hakan Ferhatosmanoglu and Cevdet Aykanat", title = "Cascade-aware partitioning of large graph databases", journal = j-VLDB-J, volume = "28", number = "3", pages = "329--350", month = jun, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0531-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Oct 2 07:30:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Graph partitioning is an essential task for scalable data management and analysis. The current partitioning methods utilize the structure of the graph, and the query log if available. Some queries performed on the database may trigger further operations. For example, the query workload of a social network application may contain re-sharing operations in the form of cascades. It is beneficial to include the potential cascades in the graph partitioning objectives. In this paper, we introduce the problem of cascade-aware graph partitioning that aims to minimize the overall cost of communication among parts/servers during cascade processes. We develop a randomized solution that estimates the underlying cascades, and use it as an input for partitioning of large-scale graphs. Experiments on 17 real social networks demonstrate the effectiveness of the proposed solution in terms of the partitioning objectives.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Das:2019:IMM, author = "Apurba Das and Michael Svendsen and Srikanta Tirthapura", title = "Incremental maintenance of maximal cliques in a dynamic graph", journal = j-VLDB-J, volume = "28", number = "3", pages = "351--375", month = jun, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00540-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Oct 2 07:30:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "We consider the maintenance of the set of all maximal cliques in a dynamic graph that is changing through the addition or deletion of edges. We present nearly tight bounds on the magnitude of change in the set of maximal cliques when edges are added to the graph, as well as the first change-sensitive algorithm for incremental clique maintenance under edge additions, whose runtime is proportional to the magnitude of the change in the set of maximal cliques, when the number of edges added is small. Our algorithm can also be applied to the decremental case, when edges are deleted from the graph. We present experimental results showing these algorithms are efficient in practice and are faster than prior work by two to three orders of magnitude.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wen:2019:ESG, author = "Dong Wen and Lu Qin and Ying Zhang and Lijun Chang and Xuemin Lin", title = "Efficient structural graph clustering: an index-based approach", journal = j-VLDB-J, volume = "28", number = "3", pages = "377--399", month = jun, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00541-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Oct 2 07:30:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Graph clustering is a fundamental problem widely applied in many applications. The structural graph clustering (\mathsf {SCAN}SCAN) method obtains not only clusters but also hubs and outliers. However, the clustering results heavily depend on two parameters, \epsilon \in and \mu \mu, while the optimal parameter setting depends on different graph properties and various user requirements. In addition, all existing \mathsf {SCAN}SCAN solutions need to scan at least the whole graph, even if only a small number of vertices belong to clusters. In this paper, we propose an index-based method for \mathsf {SCAN}SCAN. Based on our index, we cluster the graph for any \epsilon \in and \mu \mu in O(\sum _{C\in \mathbb {C}}|E_C|)O(?C?C|EC|) time, where \mathbb {C} C is the result set of all clusters and |E_C||EC| is the number of edges in a specific cluster CC. In other words, the time spent on computing structural clustering depends only on the result size, not on the size of the original graph. Our index's space complexity is O(m), where m is the number of edges in the graph. To handle dynamic graph updates, we propose algorithms and several optimization techniques for maintaining our index. We also design an index for I/O efficient query processing. We conduct extensive experiments to evaluate the performance of all our proposed algorithms on 10 real-world networks, with the largest one containing more than 1 billion edges. The experimental results demonstrate that our approaches significantly outperform existing solutions.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yu:2019:SES, author = "Weiren Yu and Xuemin Lin and Wenjie Zhang and Jian Pei and Julie A. Mccann", title = "{SimRank*}: effective and scalable pairwise similarity search based on graph topology", journal = j-VLDB-J, volume = "28", number = "3", pages = "401--426", month = jun, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0536-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Oct 2 07:30:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", abstract = "Given a graph, how can we quantify similarity between two nodes in an effective and scalable way? SimRank is an attractive measure of pairwise similarity based on graph topologies. Its underpinning philosophy that ``two nodes are similar if they are pointed to (have incoming edges) from similar nodes'' can be regarded as an aggregation of similarities based on incoming paths. Despite its popularity in various applications (e.g., web search and social networks), SimRank has an undesirable trait, i.e., ``zero-similarity'': it accommodates only the paths of equal length from a common ``center'' node, whereas a large portion of other paths are fully ignored. In this paper, we propose an effective and scalable similarity model, SimRank*, to remedy this problem. (1) We first provide a sufficient and necessary condition of the ``zero-similarity'' problem that exists in Jeh and Widom's SimRank model, Li et al. 's SimRank model, Random Walk with Restart (RWR), and ASCOS++. (2) We next present our treatment, SimRank*, which can resolve this issue while inheriting the merit of the simple SimRank philosophy. (3) We reduce the series form of SimRank* to a closed form, which looks simpler than SimRank but which enriches semantics without suffering from increased computational overhead. This leads to an iterative form of SimRank*, which requires O(Knm) time and O(n^2)O(n2) memory for computing all (n^2)(n2) pairs of similarities on a graph of n nodes and m edges for K iterations. (4) To improve the computational time of SimRank* further, we leverage a novel clustering strategy via edge concentration. Due to its NP-hardness, we devise an efficient heuristic to speed up all-pairs SimRank* computation to O(Kn{\tilde{m}})O(Knm~) time, where {\tilde{m}}m~ is generally much smaller than m. (5) To scale SimRank* on billion-edge graphs, we propose two memory-efficient single-source algorithms, i.e., ss-gSR* for geometric SimRank*, and ss-eSR* for exponential SimRank*, which can retrieve similarities between all n nodes and a given query on an as-needed basis. This significantly reduces the O(n^2)O(n2) memory of all-pairs search to either O(Kn + {\tilde{m}})O(Kn+m~) for geometric SimRank*, or O(n + {\tilde{m}})O(n+m~) for exponential SimRank*, without any loss of accuracy, where {\tilde{m}} \ll n^2m~?n2. (6) We also compare SimRank* with another remedy of SimRank that adds self-loops on each node and demonstrate that SimRank* is more effective. (7) Using real and synthetic datasets, we empirically verify the richer semantics of SimRank*, and validate its high computational efficiency and scalability on large graphs with billions of edges.", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Valdes:2019:FEM, author = "Fabio Vald{\'e}s and Ralf Hartmut G{\"u}ting", title = "A framework for efficient multi-attribute movement data analysis", journal = j-VLDB-J, volume = "28", number = "4", pages = "427--449", month = aug, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0525-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-018-0525-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Psaropoulos:2019:ICS, author = "Georgios Psaropoulos and Thomas Legler and Norman May and Anastasia Ailamaki", title = "Interleaving with coroutines: a systematic and practical approach to hide memory latency in index joins", journal = j-VLDB-J, volume = "28", number = "4", pages = "451--471", month = aug, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0533-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-018-0533-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Bronselaer:2019:CRT, author = "Antoon Bronselaer and Christophe Billiet and Robin {De Mol} and Joachim Nielandt and Guy {De Tr{\'e}}", title = "Compact representations of temporal databases", journal = j-VLDB-J, volume = "28", number = "4", pages = "473--496", month = aug, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0535-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-018-0535-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Baazizi:2019:PSI, author = "Mohamed-Amine Baazizi and Dario Colazzo and Giorgio Ghelli and Carlo Sartiani", title = "Parametric schema inference for massive {JSON} datasets", journal = j-VLDB-J, volume = "28", number = "4", pages = "497--521", month = aug, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0532-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-018-0532-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2019:SCG, author = "Yuan Li and Ahmed Eldawy and Jie Xue and Nadezda Knorozova and Mohamed F. Mokbel and Ravi Janardan", title = "Scalable computational geometry in {MapReduce}", journal = j-VLDB-J, volume = "28", number = "4", pages = "523--548", month = aug, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-018-0534-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-018-0534-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Park:2019:FSM, author = "Namyong Park and Sejoon Oh and U Kang", title = "Fast and scalable method for distributed {Boolean} tensor factorization", journal = j-VLDB-J, volume = "28", number = "4", pages = "549--574", month = aug, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00538-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00538-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Frazzetto:2019:PAS, author = "Davide Frazzetto and Thomas Dyhre Nielsen and Torben Bach Pedersen and Laurynas {\v{S}}ik{\v{s}}nys", title = "Prescriptive analytics: a survey of emerging trends and technologies", journal = j-VLDB-J, volume = "28", number = "4", pages = "575--595", month = aug, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00539-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00539-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhu:2019:FDC, author = "Rong Zhu and Zhaonian Zou and Jianzhong Li", title = "Fast diversified coherent core search on multi-layer graphs", journal = j-VLDB-J, volume = "28", number = "4", pages = "597--622", month = aug, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00542-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00542-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{AlHasanHaldar:2019:LPL, author = "Nur {Al Hasan Haldar} and Jianxin Li and Mark Reynolds and Timos Sellis and Jeffrey Xu Yu", title = "Location prediction in large-scale social networks: an in-depth benchmarking study", journal = j-VLDB-J, volume = "28", number = "5", pages = "623--648", month = oct, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00553-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00553-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Fang:2019:EPM, author = "Yixiang Fang and Yun Li and Reynold Cheng and Nikos Mamoulis and Gao Cong", title = "Evaluating pattern matching queries for spatial databases", journal = j-VLDB-J, volume = "28", number = "5", pages = "649--673", month = oct, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00550-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00550-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Kim:2019:ETD, author = "Jinhyun Kim and Jun-Ki Min and Kyuseok Shim", title = "Efficient two-dimensional {Haar$^+$} synopsis construction for the maximum absolute error measure", journal = j-VLDB-J, volume = "28", number = "5", pages = "675--701", month = oct, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00551-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00551-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Theocharidis:2019:SEM, author = "Konstantinos Theocharidis and John Liagouris and Nikos Mamoulis and Panagiotis Bouros and Manolis Terrovitis", title = "{SRX}: efficient management of spatial {RDF} data", journal = j-VLDB-J, volume = "28", number = "5", pages = "703--733", month = oct, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00554-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00554-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yang:2019:FAS, author = "Tong Yang and Jie Jiang and Yang Zhou and Long He and Jinyang Li and Bin Cui and Steve Uhlig and Xiaoming Li", title = "Fast and accurate stream processing by filtering the cold", journal = j-VLDB-J, volume = "28", number = "5", pages = "735--763", month = oct, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00560-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00560-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2019:ESW, author = "Wentao Li and Miao Qiao and Lu Qin and Ying Zhang and Lijun Chang and Xuemin Lin", title = "Eccentricities on small-world networks", journal = j-VLDB-J, volume = "28", number = "5", pages = "765--792", month = oct, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00566-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00566-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Affolter:2019:CSR, author = "Katrin Affolter and Kurt Stockinger and Abraham Bernstein", title = "A comparative survey of recent natural language interfaces for databases", journal = j-VLDB-J, volume = "28", number = "5", pages = "793--819", month = oct, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00567-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00567-8; http://link.springer.com/content/pdf/10.1007/s00778-019-00567-8.pdf", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Cheng:2019:PCD, author = "Zhinan Cheng and Qun Huang and Patrick P. C. Lee", title = "On the performance and convergence of distributed stream processing via approximate fault tolerance", journal = j-VLDB-J, volume = "28", number = "5", pages = "821--846", month = oct, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00565-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00565-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Kondylakis:2019:CSS, author = "Haridimos Kondylakis and Niv Dayan and Kostas Zoumpatianos and Themis Palpanas", title = "{Coconut}: sortable summarizations for scalable indexes over static and streaming data series", journal = j-VLDB-J, volume = "28", number = "6", pages = "847--869", month = dec, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00573-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00573-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhang:2019:EDR, author = "Tianming Zhang and Yunjun Gao and Lu Chen and Wei Guo and Shiliang Pu and Baihua Zheng and Christian S. Jensen", title = "Efficient distributed reachability querying of massive temporal graphs", journal = j-VLDB-J, volume = "28", number = "6", pages = "871--896", month = dec, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00572-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00572-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lin:2019:OPT, author = "Xuelian Lin and Jiahao Jiang and Shuai Ma and Yimeng Zuo and Chunming Hu", title = "One-pass trajectory simplification using the synchronous {Euclidean} distance", journal = j-VLDB-J, volume = "28", number = "6", pages = "897--921", month = dec, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00575-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00575-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2019:PAS, author = "Runhui Wang and Sibo Wang and Xiaofang Zhou", title = "Parallelizing approximate single-source personalized {PageRank} queries on shared memory", journal = j-VLDB-J, volume = "28", number = "6", pages = "923--940", month = dec, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00576-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/pagerank.bib; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00576-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Langdale:2019:PGJ, author = "Geoff Langdale and Daniel Lemire", title = "Parsing gigabytes of {JSON} per second", journal = j-VLDB-J, volume = "28", number = "6", pages = "941--960", month = dec, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00578-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00578-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ren:2019:SQI, author = "Weilong Ren and Xiang Lian and Kambiz Ghazinour", title = "Skyline queries over incomplete data streams", journal = j-VLDB-J, volume = "28", number = "6", pages = "961--985", month = dec, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00577-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00577-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhang:2019:ECD, author = "Fan Zhang and Xuemin Lin and Ying Zhang and Lu Qin and Wenjie Zhang", title = "Efficient community discovery with user engagement and similarity", journal = j-VLDB-J, volume = "28", number = "6", pages = "987--1012", month = dec, year = "2019", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00579-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00579-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chen:2020:VSS, author = "Lei Chen and Sihem Amer-Yahia", title = "{VLDB SI} survey editorial", journal = j-VLDB-J, volume = "29", number = "1", pages = "1--2", month = jan, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00598-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00598-1; http://link.springer.com/content/pdf/10.1007/s00778-019-00598-1.pdf", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Su:2020:STD, author = "Han Su and Shuncheng Liu and Bolong Zheng and Xiaofang Zhou and Kai Zheng", title = "A survey of trajectory distance measures and performance evaluation", journal = j-VLDB-J, volume = "29", number = "1", pages = "3--32", month = jan, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00574-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00574-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Fang:2020:MDA, author = "Jian Fang and Yvo T. B. Mulder and Jan Hidders and Jinho Lee and H. Peter Hofstee", title = "In-memory database acceleration on {FPGAs}: a survey", journal = j-VLDB-J, volume = "29", number = "1", pages = "33--59", month = jan, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00581-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00581-w; http://link.springer.com/content/pdf/10.1007/s00778-019-00581-w.pdf", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Malliaros:2020:CDN, author = "Fragkiskos D. Malliaros and Christos Giatsidis and Apostolos N. Papadopoulos and Michalis Vazirgiannis", title = "The core decomposition of networks: theory, algorithms and applications", journal = j-VLDB-J, volume = "29", number = "1", pages = "61--92", month = jan, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00587-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00587-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Qin:2020:MDV, author = "Xuedi Qin and Yuyu Luo and Nan Tang and Guoliang Li", title = "Making data visualization more efficient and effective: a survey", journal = j-VLDB-J, volume = "29", number = "1", pages = "93--117", month = jan, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00588-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00588-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Rahman:2020:EID, author = "Protiva Rahman and Lilong Jiang and Arnab Nandi", title = "Evaluating interactive data systems", journal = j-VLDB-J, volume = "29", number = "1", pages = "119--146", month = jan, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00589-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00589-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Xie:2020:ESR, author = "Min Xie and Raymond Chi-Wing Wong and Ashwin Lall", title = "An experimental survey of regret minimization query and variants: bridging the best worlds between top-$k$ query and skyline query", journal = j-VLDB-J, volume = "29", number = "1", pages = "147--175", month = jan, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00570-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00570-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Magdy:2020:MDM, author = "Amr Magdy and Laila Abdelhafeez and Yunfan Kang and Eric Ong and Mohamed F. Mokbel", title = "Microblogs data management: a survey", journal = j-VLDB-J, volume = "29", number = "1", pages = "177--216", month = jan, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00569-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00569-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Tong:2020:SCS, author = "Yongxin Tong and Zimu Zhou and Yuxiang Zeng and Lei Chen and Cyrus Shahabi", title = "Spatial crowdsourcing: a survey", journal = j-VLDB-J, volume = "29", number = "1", pages = "217--250", month = jan, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00568-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00568-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chapman:2020:DSS, author = "Adriane Chapman and Elena Simperl and Laura Koesten and George Konstantinidis and Luis-Daniel Ib{\'a}{\~n}ez and Emilia Kacprzak and Paul Groth", title = "Dataset search: a survey", journal = j-VLDB-J, volume = "29", number = "1", pages = "251--272", month = jan, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00564-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00564-x; http://link.springer.com/content/pdf/10.1007/s00778-019-00564-x.pdf", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Fevgas:2020:IFS, author = "Athanasios Fevgas and Leonidas Akritidis and Panayiotis Bozanis and Yannis Manolopoulos", title = "Indexing in flash storage devices: a survey on challenges, current approaches, and future trends", journal = j-VLDB-J, volume = "29", number = "1", pages = "273--311", month = jan, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00559-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00559-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Giatrakos:2020:CER, author = "Nikos Giatrakos and Elias Alevizos and Alexander Artikis and Antonios Deligiannakis and Minos Garofalakis", title = "Complex event recognition in the {Big Data} era: a survey", journal = j-VLDB-J, volume = "29", number = "1", pages = "313--352", month = jan, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00557-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00557-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Fang:2020:SCS, author = "Yixiang Fang and Xin Huang and Lu Qin and Ying Zhang and Wenjie Zhang and Reynold Cheng and Xuemin Lin", title = "A survey of community search over big graphs", journal = j-VLDB-J, volume = "29", number = "1", pages = "353--392", month = jan, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00556-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "See correction \cite{Fang:2020:CSC}.", URL = "http://link.springer.com/article/10.1007/s00778-019-00556-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Luo:2020:LBS, author = "Chen Luo and Michael J. Carey", title = "{LSM}-based storage techniques: a survey", journal = j-VLDB-J, volume = "29", number = "1", pages = "393--418", month = jan, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00555-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00555-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Blumenthal:2020:CHG, author = "David B. Blumenthal and Nicolas Boria and Johann Gamper and S{\'e}bastien Bougleux and Luc Brun", title = "Comparing heuristics for graph edit distance computation", journal = j-VLDB-J, volume = "29", number = "1", pages = "419--458", month = jan, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00544-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00544-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chen:2020:EMM, author = "Xinhong Chen and Qing Li", title = "Event modeling and mining: a long journey toward explainable events", journal = j-VLDB-J, volume = "29", number = "1", pages = "459--482", month = jan, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00545-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00545-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Boncz:2020:SIB, author = "Peter Boncz and Kenneth Salem", title = "Special issue on best papers of {VLDB 2017}", journal = j-VLDB-J, volume = "29", number = "1", pages = "483--484", month = jan, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00600-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00600-w; http://link.springer.com/content/pdf/10.1007/s00778-019-00600-w.pdf", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Deutch:2020:ENL, author = "Daniel Deutch and Nave Frost and Amir Gilad", title = "Explaining Natural Language query results", journal = j-VLDB-J, volume = "29", number = "1", pages = "485--508", month = jan, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00584-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00584-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Huang:2020:VOD, author = "Silu Huang and Liqi Xu and Jialin Liu and Aaron J. Elmore and Aditya Parameswaran", title = "{$ \varvec {{\sc Orpheus}} $DB}: bolt-on versioning for relational databases (extended version)", journal = j-VLDB-J, volume = "29", number = "1", pages = "509--538", month = jan, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00594-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00594-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Orr:2020:EPA, author = "Laurel Orr and Magdalena Balazinska and Dan Suciu", title = "{EntropyDB}: a probabilistic approach to approximate query processing", journal = j-VLDB-J, volume = "29", number = "1", pages = "539--567", month = jan, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00582-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00582-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Amer-Yahia:2020:VSE, author = "Sihem Amer-Yahia and Jian Pei", title = "{VLDB SI 2018} editorial", journal = j-VLDB-J, volume = "29", number = "2--3", pages = "593--594", month = may, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00599-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00599-0; http://link.springer.com/content/pdf/10.1007/s00778-019-00599-0.pdf", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Sahu:2020:ULG, author = "Siddhartha Sahu and Amine Mhedhbi and Semih Salihoglu and Jimmy Lin and M. Tamer {\"O}zsu", title = "The ubiquity of large graphs and surprising challenges of graph processing: extended survey", journal = j-VLDB-J, volume = "29", number = "2--3", pages = "595--618", month = may, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00548-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00548-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Idris:2020:GDY, author = "Muhammad Idris and Mart{\'{\i}}n Ugarte and Stijn Vansummeren and Hannes Voigt and Wolfgang Lehner", title = "General dynamic {Yannakakis}: conjunctive queries with theta joins under updates", journal = j-VLDB-J, volume = "29", number = "2--3", pages = "619--653", month = may, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00590-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00590-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Bonifati:2020:ASL, author = "Angela Bonifati and Wim Martens and Thomas Timm", title = "An analytical study of large {SPARQL} query logs", journal = j-VLDB-J, volume = "29", number = "2--3", pages = "655--679", month = may, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00558-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00558-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Asudeh:2020:SAS, author = "Abolfazl Asudeh and Jees Augustine and Azade Nazi and Saravanan Thirumuruganathan and Nan Zhang and Gautam Das and Divesh Srivastava", title = "Scalable algorithms for signal reconstruction by leveraging similarity joins", journal = j-VLDB-J, volume = "29", number = "2--3", pages = "681--707", month = may, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00562-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00562-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ratner:2020:SRT, author = "Alexander Ratner and Stephen H. Bach and Henry Ehrenberg and Jason Fries and Sen Wu and Christopher R{\'e}", title = "{Snorkel}: rapid training data creation with weak supervision", journal = j-VLDB-J, volume = "29", number = "2--3", pages = "709--730", month = may, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00552-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00552-1; http://link.springer.com/content/pdf/10.1007/s00778-019-00552-1.pdf", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Breslow:2020:MFF, author = "Alex D. Breslow and Nuwan S. Jayasena", title = "{Morton} filters: fast, compressed sparse cuckoo filters", journal = j-VLDB-J, volume = "29", number = "2--3", pages = "731--754", month = may, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00561-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00561-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Salem:2020:SIB, author = "Kenneth Salem", title = "Special issue on best papers of {DaMoN 2018}", journal = j-VLDB-J, volume = "29", number = "2--3", pages = "755--755", month = may, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00597-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00597-2; http://link.springer.com/content/pdf/10.1007/s00778-019-00597-2.pdf", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lang:2020:MMY, author = "Harald Lang and Linnea Passing and Andreas Kipf and Peter Boncz and Thomas Neumann and Alfons Kemper", title = "Make the most out of your {SIMD} investments: counter control flow divergence in compiled query pipelines", journal = j-VLDB-J, volume = "29", number = "2--3", pages = "757--774", month = may, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00547-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00547-y; http://link.springer.com/content/pdf/10.1007/s00778-019-00547-y.pdf", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zarubin:2020:ECN, author = "Mikhail Zarubin and Thomas Kissinger and Dirk Habich and Thomas Willhalm and Wolfgang Lehner", title = "Efficient compute node-local replication mechanisms for {NVRAM}-centric data structures", journal = j-VLDB-J, volume = "29", number = "2--3", pages = "775--795", month = may, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00549-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00549-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Pohl:2020:JHB, author = "Constantin Pohl and Kai-Uwe Sattler and Goetz Graefe", title = "Joins on high-bandwidth memory: a new level in the memory hierarchy", journal = j-VLDB-J, volume = "29", number = "2--3", pages = "797--817", month = may, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00546-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Mar 19 17:10:22 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "http://link.springer.com/article/10.1007/s00778-019-00546-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Pedersen:2020:FSR, author = "Simon Aagaard Pedersen and Bin Yang and Christian S. Jensen", title = "Fast stochastic routing under time-varying uncertainty", journal = j-VLDB-J, volume = "29", number = "4", pages = "819--839", month = jul, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00585-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-019-00585-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 31 October 2019 Pages: 819 - 839", } @Article{Xu:2020:EPM, author = "Hongfei Xu and Yu Gu and Rui Zhang", title = "Efficient processing of moving collective spatial keyword queries", journal = j-VLDB-J, volume = "29", number = "4", pages = "841--865", month = jul, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00583-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-019-00583-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 01 November 2019 Pages: 841 - 865", } @Article{Geerts:2020:CDL, author = "Floris Geerts and Giansalvatore Mecca and Donatello Santoro", title = "Cleaning data with {Llunatic}", journal = j-VLDB-J, volume = "29", number = "4", pages = "867--892", month = jul, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00586-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-019-00586-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 08 November 2019 Pages: 867 - 892", } @Article{Wu:2020:TRS, author = "Dingming Wu and Hao Zhou and Nikos Mamoulis", title = "Top-$k$ relevant semantic place retrieval on spatiotemporal {RDF} data", journal = j-VLDB-J, volume = "29", number = "4", pages = "893--917", month = jul, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00591-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-019-00591-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 19 November 2019 Pages: 893 - 917", } @Article{Qin:2020:EQA, author = "Jianbin Qin and Chuan Xiao and Kunihiko Sadakane", title = "Efficient query autocompletion with edit distance-based error tolerance", journal = j-VLDB-J, volume = "29", number = "4", pages = "919--943", month = jul, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00595-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-019-00595-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 14 December 2019 Pages: 919 - 943", } @Article{Jiang:2020:SCS, author = "Jiawei Jiang and Fangcheng Fu and Bin Cui", title = "{SKCompress}: compressing sparse and nonuniform gradient in distributed machine learning", journal = j-VLDB-J, volume = "29", number = "5", pages = "945--972", month = sep, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00596-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-019-00596-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 01 January 2020 Pages: 945 - 972", } @Article{Wang:2020:FEF, author = "Chaohui Wang and Miao Xie and Shuigeng Zhou", title = "{FERRARI}: an efficient framework for visual exploratory subgraph search in graph databases", journal = j-VLDB-J, volume = "29", number = "5", pages = "973--998", month = sep, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00601-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00601-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 30 January 2020 Pages: 973 - 998", } @Article{Chang:2020:EMC, author = "Lijun Chang", title = "Efficient maximum clique computation and enumeration over large sparse graphs", journal = j-VLDB-J, volume = "29", number = "5", pages = "999--1022", month = sep, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00602-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00602-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 15 February 2020 Pages: 999 - 1022", } @Article{Chondrogiannis:2020:FSP, author = "Theodoros Chondrogiannis and Panagiotis Bouros and David B. Blumenthal", title = "Finding $k$-shortest paths with limited overlap", journal = j-VLDB-J, volume = "29", number = "5", pages = "1023--1047", month = sep, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00604-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00604-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 21 February 2020 Pages: 1023 - 1047", } @Article{Zou:2020:ADS, author = "Jia Zou and Arun Iyengar and Chris Jermaine", title = "Architecture of a distributed storage that combines file system, memory and computation in a single layer", journal = j-VLDB-J, volume = "29", number = "5", pages = "1049--1073", month = sep, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00605-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00605-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 26 February 2020 Pages: 1049 - 1073", } @Article{Liu:2020:ECC, author = "Boge Liu and Long Yuan and Jingren Zhou", title = "Efficient $ (\alpha, \beta)$-core computation in bipartite graphs", journal = j-VLDB-J, volume = "29", number = "5", pages = "1075--1099", month = sep, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00606-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00606-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 04 March 2020 Pages: 1075 - 1099", } @Article{Chen:2020:TTP, author = "Lisi Chen and Shuo Shang and Ling Shao", title = "Top-$k$ term publish\slash subscribe for geo-textual data streams", journal = j-VLDB-J, volume = "29", number = "5", pages = "1101--1128", month = sep, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00607-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00607-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 09 March 2020 Pages: 1101 - 1128", } @Article{Yang:2020:TFA, author = "Fan Yang and Faisal M. Almutairi and Vladimir Zadorozhny", title = "{TurboLift}: fast accuracy lifting for historical data recovery", journal = j-VLDB-J, volume = "29", number = "5", pages = "1129--1148", month = sep, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00609-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "See correction \cite{Yang:2024:CTF}.", URL = "https://link.springer.com/article/10.1007/s00778-020-00609-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 09 March 2020 Pages: 1129 - 1148", } @Article{Guo:2020:CAP, author = "Chenjuan Guo and Bin Yang and Lu Chen", title = "Context-aware, preference-based vehicle routing", journal = j-VLDB-J, volume = "29", number = "5", pages = "1149--1170", month = sep, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00608-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00608-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 11 March 2020 Pages: 1149 - 1170", } @Article{Cai:2020:DSK, author = "Zhi Cai and Georgios Kalamatianos and Dimitris Papadias", title = "Diversified spatial keyword search on {RDF} data", journal = j-VLDB-J, volume = "29", number = "5", pages = "1171--1189", month = sep, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00610-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00610-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 12 March 2020 Pages: 1171 - 1189", } @Article{Goasdoue:2020:RGS, author = "Fran{\c{c}}ois Goasdou{\'e} and Pawe{\l} Guzewicz and Ioana Manolescu", title = "{RDF} graph summarization for first-sight structure discovery", journal = j-VLDB-J, volume = "29", number = "5", pages = "1191--1218", month = sep, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00611-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00611-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 30 April 2020 Pages: 1191 - 1218", } @Article{Fang:2020:CSC, author = "Yixiang Fang and Xin Huang and Xuemin Lin", title = "Correction: {A survey of community search over big graphs}", journal = j-VLDB-J, volume = "29", number = "5", pages = "1219--1219", month = sep, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-019-00592-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "See \cite{Fang:2020:SCS}.", URL = "https://link.springer.com/article/10.1007/s00778-019-00592-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 11 November 2019 Pages: 1219 - 1219", } @Article{Aboulnaga:2020:SIB, author = "Ashraf Aboulnaga", title = "Special issue on the best papers of {DaMoN 2019}", journal = j-VLDB-J, volume = "29", number = "6", pages = "1221--1221", month = nov, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00629-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00629-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 05 September 2020 Pages: 1221 - 1221", } @Article{vanRenen:2020:BBP, author = "Alexander van Renen and Lukas Vogel and Alfons Kemper", title = "Building blocks for persistent memory", journal = j-VLDB-J, volume = "29", number = "6", pages = "1223--1241", month = nov, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00622-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00622-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 23 September 2020 Pages: 1223 - 1241", } @Article{Polychroniou:2020:VSV, author = "Orestis Polychroniou and Kenneth A. Ross", title = "{VIP}: a {SIMD} vectorized analytical query engine", journal = j-VLDB-J, volume = "29", number = "6", pages = "1243--1261", month = nov, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00621-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00621-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 13 July 2020 Pages: 1243 - 1261", } @Article{Lasch:2020:FSS, author = "Robert Lasch and Ismail Oukid and Kai-Uwe Sattler", title = "Faster \& strong: string dictionary compression using sampling and fast vectorized decompression", journal = j-VLDB-J, volume = "29", number = "6", pages = "1263--1285", month = nov, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00620-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00620-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 20 July 2020 Pages: 1263 - 1285", } @Article{Kruse:2020:RDJ, author = "Sebastian Kruse and Zoi Kaoudi and Jorge-Arnulfo Quian{\'e}-Ruiz", title = "{RHEEMix} in the data jungle: a cost-based optimizer for cross-platform systems", journal = j-VLDB-J, volume = "29", number = "6", pages = "1287--1310", month = nov, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00612-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00612-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 18 May 2020 Pages: 1287 - 1310", } @Article{Yang:2020:GBF, author = "Jingru Yang and Ju Fan and Xiaoyong Du", title = "A game-based framework for crowdsourced data labeling", journal = j-VLDB-J, volume = "29", number = "6", pages = "1311--1336", month = nov, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00613-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00613-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 19 May 2020 Pages: 1311 - 1336", } @Article{Jacobs:2020:BBB, author = "Steven Jacobs and Xikui Wang and Md Yusuf Sarwar Uddin", title = "{BAD} to the bone: {Big Active Data} at its core", journal = j-VLDB-J, volume = "29", number = "6", pages = "1337--1364", month = nov, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00616-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00616-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 23 May 2020 Pages: 1337 - 1364", } @Article{Sun:2020:TSI, author = "Tao Sun and Hongbo Liu and Xindong Wu", title = "Time series indexing by dynamic covering with cross-range constraints", journal = j-VLDB-J, volume = "29", number = "6", pages = "1365--1384", month = nov, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00614-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00614-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 28 May 2020 Pages: 1365 - 1384", } @Article{Huang:2020:EAA, author = "Keke Huang and Jing Tang and Andrew Lim", title = "Efficient approximation algorithms for adaptive influence maximization", journal = j-VLDB-J, volume = "29", number = "6", pages = "1385--1406", month = nov, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00615-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00615-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 01 June 2020 Pages: 1385 - 1406", } @Article{Li:2020:FSC, author = "Rong-Hua Li and Lu Qin and Zibin Zheng", title = "Finding skyline communities in multi-valued networks", journal = j-VLDB-J, volume = "29", number = "6", pages = "1407--1432", month = nov, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00618-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00618-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 08 June 2020 Pages: 1407 - 1432", } @Article{Ahmad:2020:AWM, author = "Hiba Abu Ahmad and Hongzhi Wang", title = "Automatic weighted matching rectifying rule discovery for data repairing", journal = j-VLDB-J, volume = "29", number = "6", pages = "1433--1447", month = nov, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00617-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00617-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 09 June 2020 Pages: 1433 - 1447", } @Article{Linardi:2020:SDS, author = "Michele Linardi and Themis Palpanas", title = "Scalable data series subsequence matching with {ULISSE}", journal = j-VLDB-J, volume = "29", number = "6", pages = "1449--1474", month = nov, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00619-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00619-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 04 July 2020 Pages: 1449 - 1474", } @Article{Song:2020:IPA, author = "Liangjun Song and Junhao Gan and Timos Sellis", title = "Incremental preference adjustment: a graph-theoretical approach", journal = j-VLDB-J, volume = "29", number = "6", pages = "1475--1500", month = nov, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00623-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00623-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 03 August 2020 Pages: 1475 - 1500", } @Article{Lee:2020:TLA, author = "Dongjin Lee and Kijung Shin and Christos Faloutsos", title = "Temporal locality-aware sampling for accurate triangle counting in real graph streams", journal = j-VLDB-J, volume = "29", number = "6", pages = "1501--1525", month = nov, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00624-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00624-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 12 August 2020 Pages: 1501 - 1525", } @Article{Omidvar-Tehrani:2020:CAE, author = "Behrooz Omidvar-Tehrani and Sihem Amer-Yahia and Laks V. S. Lakshmanan", title = "Cohort analytics: efficiency and applicability", journal = j-VLDB-J, volume = "29", number = "6", pages = "1527--1550", month = nov, year = "2020", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00625-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00625-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 27 August 2020 Pages: 1527 - 1550", } @Article{Ozcan:2021:GES, author = "Fatma {\"O}zcan and Lei Chen", title = "Guest Editorial: Special issue on {VLDB 2019}", journal = j-VLDB-J, volume = "30", number = "1", pages = "1--2", month = jan, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00630-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:41 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00630-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 11 September 2020 Pages: 1 - 2", } @Article{Ruan:2021:LFG, author = "Pingcheng Ruan and Tien Tuan Anh Dinh and Beng Chin Ooi", title = "{LineageChain}: a fine-grained, secure and efficient data provenance system for blockchains", journal = j-VLDB-J, volume = "30", number = "1", pages = "3--24", month = jan, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00646-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:41 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00646-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 01 January 2021 Pages: 3 - 24", } @Article{Wu:2021:ATC, author = "Chenggang Wu and Vikram Sreekanti and Joseph M. Hellerstein", title = "Autoscaling tiered cloud storage in {Anna}", journal = j-VLDB-J, volume = "30", number = "1", pages = "25--43", month = jan, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00632-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:41 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00632-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 09 September 2020 Pages: 25 - 43", } @Article{Abuzaid:2021:DRI, author = "Firas Abuzaid and Peter Kraft and Matei Zaharia", title = "{DIFF}: a relational interface for large-scale data explanation", journal = j-VLDB-J, volume = "30", number = "1", pages = "45--70", month = jan, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00633-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:41 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00633-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 30 September 2020 Pages: 45 - 70", } @Article{Whittaker:2021:ICC, author = "Michael Whittaker and Joseph M. Hellerstein", title = "Interactive checks for coordination avoidance", journal = j-VLDB-J, volume = "30", number = "1", pages = "71--92", month = jan, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00628-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:41 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00628-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 05 September 2020 Pages: 71 - 92", } @Article{Fan:2021:GBV, author = "Hua Fan and Wojciech Golab", title = "Gossip-based visibility control for high-performance geo-distributed transactions", journal = j-VLDB-J, volume = "30", number = "1", pages = "93--114", month = jan, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00626-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:41 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00626-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 21 September 2020 Pages: 93 - 114", } @Article{Li:2021:QSD, author = "Yuliang Li and Aaron Feng and Wang-Chiew Tan", title = "Querying subjective data", journal = j-VLDB-J, volume = "30", number = "1", pages = "115--140", month = jan, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00634-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:41 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00634-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 08 September 2020 Pages: 115 - 140", } @Article{Dong:2021:CTS, author = "Yuyang Dong and Chuan Xiao and Hiroyuki Kitagawa", title = "Continuous top-$k$ spatial-keyword search on dynamic objects", journal = j-VLDB-J, volume = "30", number = "2", pages = "141--161", month = mar, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00627-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:41 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00627-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 05 September 2020 Pages: 141 - 161", } @Article{Zhang:2021:TTA, author = "Feng Zhang and Jidong Zhai and Xiaoyong Du", title = "{TADOC}: Text analytics directly on compression", journal = j-VLDB-J, volume = "30", number = "2", pages = "163--188", month = mar, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00636-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:41 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00636-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 19 September 2020 Pages: 163 - 188", } @Article{Li:2021:CTQ, author = "Yan Li and Hao Wang and Zhiguo Gong", title = "Crowdsourced top-$k$ queries by pairwise preference judgments with confidence and budget control", journal = j-VLDB-J, volume = "30", number = "2", pages = "189--213", month = mar, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00631-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:41 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00631-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 21 September 2020 Pages: 189 - 213", } @Article{Liu:2021:LET, author = "Wanqi Liu and Hanchen Wang and Xuemin Lin", title = "{EI-LSH}: an early-termination driven {I/O} efficient incremental $c$-approximate nearest neighbor search", journal = j-VLDB-J, volume = "30", number = "2", pages = "215--235", month = mar, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00635-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:41 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00635-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 30 September 2020 Pages: 215 - 235", } @Article{Yu:2021:GCC, author = "Jia Yu and Mohamed Sarwat", title = "{GeoSparkViz}: a cluster computing system for visualizing massive-scale geospatial data", journal = j-VLDB-J, volume = "30", number = "2", pages = "237--258", month = mar, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00645-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:41 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00645-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 07 January 2021 Pages: 237 - 258", } @Article{Zhang:2021:SAN, author = "Yongqi Zhang and Quanming Yao and Lei Chen", title = "Simple and automated negative sampling for knowledge graph embedding", journal = j-VLDB-J, volume = "30", number = "2", pages = "259--285", month = mar, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00640-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:41 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00640-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 28 January 2021 Pages: 259 - 285", } @Article{Fang:2021:DHE, author = "Ziquan Fang and Lu Chen and Christian S. Jensen", title = "{Dragoon}: a hybrid and efficient big trajectory management system for offline and online analytics", journal = j-VLDB-J, volume = "30", number = "2", pages = "287--310", month = mar, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00652-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu May 13 17:41:41 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00652-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", online-date = "Published: 03 February 2021 Pages: 287 - 310", } @Article{Paul:2021:SER, author = "Debjyoti Paul and Feifei Li and Jeff M. Phillips", title = "Semantic embedding for regions of interest", journal = j-VLDB-J, volume = "30", number = "3", pages = "311--331", month = may, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00647-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Apr 9 10:33:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00647-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Romanous:2021:ELL, author = "Bashar Romanous and Skyler Windh and Vassilis Tsotras", title = "Efficient local locking for massively multithreaded in-memory hash-based operators", journal = j-VLDB-J, volume = "30", number = "3", pages = "333--359", month = may, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00642-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Apr 9 10:33:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00642-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Mao:2021:CES, author = "Qizhong Mao and Steven Jacobs and Neal E. Young", title = "Comparison and evaluation of state-of-the-art {LSM} merge policies", journal = j-VLDB-J, volume = "30", number = "3", pages = "361--378", month = may, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00638-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Apr 9 10:33:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00638-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Piatov:2021:CES, author = "Danila Piatov and Sven Helmer and Fabio Persia", title = "Cache-efficient sweeping-based interval joins for extended {Allen} relation predicates", journal = j-VLDB-J, volume = "30", number = "3", pages = "379--402", month = may, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00650-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Apr 9 10:33:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00650-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Do:2021:BDC, author = "Jaeyoung Do and Ivan Luiz Picoli and Philippe Bonnet", title = "Better database cost\slash performance via batched {I/O} on programmable {SSD}", journal = j-VLDB-J, volume = "30", number = "3", pages = "403--424", month = may, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00648-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Apr 9 10:33:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00648-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Song:2021:CTT, author = "Shaoxu Song and Ruihong Huang and Jianmin Wang", title = "Cleaning timestamps with temporal constraints", journal = j-VLDB-J, volume = "30", number = "3", pages = "425--446", month = may, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00641-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Apr 9 10:33:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00641-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yang:2021:IEM, author = "Chengcheng Yang and Dong Deng and Ling Shao", title = "Internal and external memory set containment join", journal = j-VLDB-J, volume = "30", number = "3", pages = "447--470", month = may, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00644-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Apr 9 10:33:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "See correction \cite{Yang:2021:CIE}.", URL = "https://link.springer.com/article/10.1007/s00778-020-00644-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chen:2021:ESN, author = "Xiaoshuang Chen and Longbin Lai and Xuemin Lin", title = "Efficient structural node similarity computation on billion-scale graphs", journal = j-VLDB-J, volume = "30", number = "3", pages = "471--493", month = may, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00654-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Apr 9 10:33:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00654-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yu:2021:VAR, author = "Wenhui Yu and Xiangnan He and Zheng Qin", title = "Visually aware recommendation with aesthetic features", journal = j-VLDB-J, volume = "30", number = "4", pages = "495--513", month = jul, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00651-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00651-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Hao:2021:MCE, author = "Shuang Hao and Nan Tang and Ning Wang", title = "Mis-categorized entities detection", journal = j-VLDB-J, volume = "30", number = "4", pages = "515--536", month = jul, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00653-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00653-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Galhotra:2021:EEP, author = "Sainyam Galhotra and Donatella Firmani and Divesh Srivastava", title = "Efficient and effective {ER} with progressive blocking", journal = j-VLDB-J, volume = "30", number = "4", pages = "537--557", month = jul, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00656-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00656-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Hewasinghage:2021:CMR, author = "Moditha Hewasinghage and Alberto Abell{\'o} and Esteban Zim{\'a}nyi", title = "A cost model for random access queries in document stores", journal = j-VLDB-J, volume = "30", number = "4", pages = "559--578", month = jul, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00660-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00660-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Schneider:2021:DDS, author = "Johannes Schneider and Phillip Wenig and Thorsten Papenbrock", title = "Distributed detection of sequential anomalies in univariate time series", journal = j-VLDB-J, volume = "30", number = "4", pages = "579--602", month = jul, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00657-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00657-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chen:2021:LKB, author = "Zhida Chen and Lisi Chen and Christian S. Jensen", title = "Location- and keyword-based querying of geo-textual data: a survey", journal = j-VLDB-J, volume = "30", number = "4", pages = "603--640", month = jul, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00661-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00661-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Sirin:2021:MAA, author = "Utku Sirin and Pinar T{\"o}z{\"u}n and Anastasia Ailamaki", title = "Micro-architectural analysis of in-memory {OLTP}: Revisited", journal = j-VLDB-J, volume = "30", number = "4", pages = "641--665", month = jul, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00663-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00663-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Bouros:2021:MIJ, author = "Panagiotis Bouros and Nikos Mamoulis and Manolis Terrovitis", title = "In-Memory Interval Joins", journal = j-VLDB-J, volume = "30", number = "4", pages = "667--691", month = jul, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00639-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00639-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Guo:2021:MAD, author = "Yunyan Guo and Zhipeng Zhang and Jianzhong Li", title = "Model averaging in distributed machine learning: a case study with {Apache Spark}", journal = j-VLDB-J, volume = "30", number = "4", pages = "693--712", month = jul, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00664-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00664-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Jiang:2021:EEK, author = "Yuli Jiang and Xin Huang and Hong Cheng", title = "{I/O} efficient $k$-truss community search in massive graphs", journal = j-VLDB-J, volume = "30", number = "5", pages = "713--738", month = sep, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00649-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00649-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Balayn:2021:MBU, author = "Agathe Balayn and Christoph Lofi and Geert-Jan Houben", title = "Managing bias and unfairness in data for decision support: a survey of machine learning and data engineering approaches to identify and mitigate bias and unfairness within data management and analytics systems", journal = j-VLDB-J, volume = "30", number = "5", pages = "739--768", month = sep, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00671-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00671-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Shao:2021:MAF, author = "Yingxia Shao and Shiyue Huang and Lei Chen", title = "Memory-aware framework for fast and scalable second-order random walk over billion-edge natural graphs", journal = j-VLDB-J, volume = "30", number = "5", pages = "769--797", month = sep, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00669-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00669-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Peng:2021:EHC, author = "You Peng and Xuemin Lin and Jingren Zhou", title = "Efficient Hop-constrained $s$--$t$ Simple Path Enumeration", journal = j-VLDB-J, volume = "30", number = "5", pages = "799--823", month = sep, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00674-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00674-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Debrouvier:2021:MQL, author = "Ariel Debrouvier and Eliseo Parodi and Alejandro Vaisman", title = "A model and query language for temporal graph databases", journal = j-VLDB-J, volume = "30", number = "5", pages = "825--858", month = sep, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00675-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00675-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2021:FSH, author = "Jin Wang and Jiacheng Wu and Carlo Zaniolo", title = "Formal semantics and high performance in declarative machine learning using {Datalog}", journal = j-VLDB-J, volume = "30", number = "5", pages = "859--881", month = sep, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00665-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00665-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Kersten:2021:TTF, author = "Timo Kersten and Viktor Leis and Thomas Neumann", title = "Tidy Tuples and Flying Start: fast compilation and fast execution of relational queries in {Umbra}", journal = j-VLDB-J, volume = "30", number = "5", pages = "883--905", month = sep, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-020-00643-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-020-00643-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yang:2021:CIE, author = "Chengcheng Yang and Dong Deng and Ling Shao", title = "Correction to: {Internal} and external memory set containment join", journal = j-VLDB-J, volume = "30", number = "5", pages = "907--907", month = sep, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00662-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "See \cite{Yang:2021:IEM}.", URL = "https://link.springer.com/article/10.1007/s00778-021-00662-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Boniol:2021:USS, author = "Paul Boniol and Michele Linardi and Emmanuel Remy", title = "Unsupervised and scalable subsequence anomaly detection in large data series", journal = j-VLDB-J, volume = "30", number = "6", pages = "909--931", month = nov, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00655-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "See correction \cite{Boniol:2023:CUS}.", URL = "https://link.springer.com/article/10.1007/s00778-021-00655-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Tangwongsan:2021:OSW, author = "Kanat Tangwongsan and Martin Hirzel and Scott Schneider", title = "In-order sliding-window aggregation in worst-case constant time", journal = j-VLDB-J, volume = "30", number = "6", pages = "933--957", month = nov, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00668-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00668-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhang:2021:HCE, author = "Ji Zhang and Ke Zhou and Jiashu Xing", title = "{CDBTune}$^+$: an efficient deep reinforcement learning-based automatic cloud database tuning system", journal = j-VLDB-J, volume = "30", number = "6", pages = "959--987", month = nov, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00670-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00670-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2021:EBS, author = "Hanzhi Wang and Zhewei Wei and Ji-Rong Wen", title = "{ExactSim}: benchmarking single-source {SimRank} algorithms with high-precision ground truths", journal = j-VLDB-J, volume = "30", number = "6", pages = "989--1015", month = nov, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00672-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00672-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Forresi:2021:DBF, author = "Chiara Forresi and Enrico Gallinucci and Hamdi {Ben Hamadou}", title = "A dataspace-based framework for {OLAP} analyses in a high-variety multistore", journal = j-VLDB-J, volume = "30", number = "6", pages = "1017--1040", month = nov, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00682-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00682-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Peng:2021:FDS, author = "Botao Peng and Panagiota Fatourou and Themis Palpanas", title = "Fast data series indexing for in-memory data", journal = j-VLDB-J, volume = "30", number = "6", pages = "1041--1067", month = nov, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00677-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00677-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wei:2021:ADE, author = "Ziheng Wei and Sven Hartmann and Sebastian Link", title = "Algorithms for the discovery of embedded functional dependencies", journal = j-VLDB-J, volume = "30", number = "6", pages = "1069--1093", month = nov, year = "2021", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00684-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Apr 14 14:19:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00684-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Kossmann:2022:DDQ, author = "Jan Kossmann and Thorsten Papenbrock and Felix Naumann", title = "Data dependencies for query optimization: a survey", journal = j-VLDB-J, volume = "31", number = "1", pages = "1--22", month = jan, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00676-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 15 07:02:55 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "See correction \cite{Kossmann:2023:CDD}.", URL = "https://link.springer.com/article/10.1007/s00778-021-00676-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhu:2022:PSA, author = "Yifan Zhu and Lu Chen and Christian S. Jensen", title = "Pivot selection algorithms in metric spaces: a survey and experimental study", journal = j-VLDB-J, volume = "31", number = "1", pages = "23--47", month = jan, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00691-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 15 07:02:55 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00691-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Schmidl:2022:EDD, author = "Sebastian Schmidl and Thorsten Papenbrock", title = "Efficient distributed discovery of bidirectional order dependencies", journal = j-VLDB-J, volume = "31", number = "1", pages = "49--74", month = jan, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00683-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 15 07:02:55 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00683-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Dignos:2022:LRJ, author = "Anton Dign{\"o}s and Michael H. B{\"o}hlen and Peter Moser", title = "Leveraging range joins for the computation of overlap joins", journal = j-VLDB-J, volume = "31", number = "1", pages = "75--99", month = jan, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00692-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 15 07:02:55 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00692-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Peng:2022:ARE, author = "You Peng and Xuemin Lin and Lu Qin", title = "Answering reachability and {$K$}-reach queries on large graphs with label constraints", journal = j-VLDB-J, volume = "31", number = "1", pages = "101--127", month = jan, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00695-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 15 07:02:55 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00695-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2022:DLP, author = "Wentao Li and Miao Qiao and Xuemin Lin", title = "Distance labeling: on parallelism, compression, and ordering", journal = j-VLDB-J, volume = "31", number = "1", pages = "129--155", month = jan, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00694-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 15 07:02:55 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00694-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Alevizos:2022:CEF, author = "Elias Alevizos and Alexander Artikis and Georgios Paliouras", title = "Complex event forecasting with prediction suffix trees", journal = j-VLDB-J, volume = "31", number = "1", pages = "157--180", month = jan, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00698-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 15 07:02:55 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00698-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Snodgrass:2022:QOH, author = "Richard T. Snodgrass and Sabah Currim and Young-Kyoon Suh", title = "Have query optimizers hit the wall?", journal = j-VLDB-J, volume = "31", number = "1", pages = "181--200", month = jan, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00689-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 15 07:02:55 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00689-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Bonifati:2022:SIB, author = "Angela Bonifati and Hannes Voigt", title = "Special issue on big graph data management and processing", journal = j-VLDB-J, volume = "31", number = "2", pages = "201--202", month = mar, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00732-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Apr 16 07:47:28 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00732-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2022:TES, author = "Kai Wang and Xuemin Lin and Ying Zhang", title = "Towards efficient solutions of bitruss decomposition for large-scale bipartite graphs", journal = j-VLDB-J, volume = "31", number = "2", pages = "203--226", month = mar, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00658-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Apr 16 07:47:28 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00658-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Linghu:2022:ACE, author = "Qingyuan Linghu and Fan Zhang and Ying Zhang", title = "Anchored coreness: efficient reinforcement of social networks", journal = j-VLDB-J, volume = "31", number = "2", pages = "227--252", month = mar, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00673-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Apr 16 07:47:28 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00673-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yan:2022:PPF, author = "Da Yan and Wenwen Qu and Yang Zhou", title = "{PrefixFPM}: a parallel framework for general-purpose mining of frequent and closed patterns", journal = j-VLDB-J, volume = "31", number = "2", pages = "253--286", month = mar, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00687-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Apr 16 07:47:28 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00687-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yan:2022:GTG, author = "Da Yan and Guimu Guo and John C. S. Lui", title = "{G-thinker}: a general distributed framework for finding qualified subgraphs in a big graph with load balancing", journal = j-VLDB-J, volume = "31", number = "2", pages = "287--320", month = mar, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00688-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Apr 16 07:47:28 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00688-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Mohamed:2022:RKG, author = "Aisha Mohamed and Ghadeer Abuoda and Ashraf Aboulnaga", title = "{RDFFrames}: knowledge graph access for machine learning tools", journal = j-VLDB-J, volume = "31", number = "2", pages = "321--346", month = mar, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00690-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Apr 16 07:47:28 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00690-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Sagi:2022:DSR, author = "Tomer Sagi and Matteo Lissandrini and Katja Hose", title = "A design space for {RDF} data representations", journal = j-VLDB-J, volume = "31", number = "2", pages = "347--373", month = mar, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00725-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Apr 16 07:47:28 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00725-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Rost:2022:DTG, author = "Christopher Rost and Kevin Gomez and Erhard Rahm", title = "Distributed temporal graph analytics with {GRADOOP}", journal = j-VLDB-J, volume = "31", number = "2", pages = "375--401", month = mar, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00667-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Apr 16 07:47:28 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00667-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Bevilacqua:2022:FME, author = "Glenn S. Bevilacqua and Laks V. S. Lakshmanan", title = "A fractional memory-efficient approach for online continuous-time influence maximization", journal = j-VLDB-J, volume = "31", number = "2", pages = "403--429", month = mar, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00679-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Apr 16 07:47:28 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00679-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ali:2022:SRS, author = "Waqas Ali and Muhammad Saleem and Axel-Cyrille Ngonga Ngomo", title = "A survey of {RDF} stores \& {SPARQL} engines for querying knowledge graphs", journal = j-VLDB-J, volume = "31", number = "3", pages = "1--26", month = may, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00711-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri May 6 07:32:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00711-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Pitoura:2022:FRR, author = "Evaggelia Pitoura and Kostas Stefanidis and Georgia Koutrika", title = "Fairness in rankings and recommendations: an overview", journal = j-VLDB-J, volume = "31", number = "3", pages = "431--458", month = may, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00697-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri May 6 07:32:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00697-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Hidayat:2022:CMM, author = "Arif Hidayat and Muhammad Aamir Cheema and Ying Zhang", title = "Continuous monitoring of moving skyline and top-$k$ queries", journal = j-VLDB-J, volume = "31", number = "3", pages = "459--482", month = may, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00702-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri May 6 07:32:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00702-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Farhan:2022:FFD, author = "Muhammad Farhan and Qing Wang and Brendan McKay", title = "Fast fully dynamic labelling for distance queries", journal = j-VLDB-J, volume = "31", number = "3", pages = "483--506", month = may, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00707-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri May 6 07:32:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00707-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhao:2022:RCS, author = "Tianyu Zhao and Shuai Huang and Guoliang Li", title = "{RNE}: computing shortest paths using road network embedding", journal = j-VLDB-J, volume = "31", number = "3", pages = "507--528", month = may, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00705-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri May 6 07:32:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00705-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lai:2022:AMW, author = "Zhuohang Lai and Xibo Sun and Xiaolong Xie", title = "Accelerating multi-way joins on the {GPU}", journal = j-VLDB-J, volume = "31", number = "3", pages = "529--553", month = may, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00708-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri May 6 07:32:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00708-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{diVimercati:2022:AMQ, author = "Sabrina {De Capitani di Vimercati} and Sara Foresti and Pierangela Samarati", title = "An authorization model for query execution in the cloud", journal = j-VLDB-J, volume = "31", number = "3", pages = "555--579", month = may, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00709-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri May 6 07:32:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00709-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Huang:2022:PEG, author = "Kai Huang and Haibo Hu and Xiaofang Zhou", title = "Privacy and efficiency guaranteed social subgraph matching", journal = j-VLDB-J, volume = "31", number = "3", pages = "581--602", month = may, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00706-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri May 6 07:32:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00706-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wen:2022:SRQ, author = "Dong Wen and Bohua Yang and Wenjie Zhang", title = "Span-reachability querying in large temporal graphs", journal = j-VLDB-J, volume = "31", number = "4", pages = "629--647", month = jul, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00715-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jun 25 16:46:59 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00715-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Khalil:2022:PML, author = "Jalal Khalil and Da Yan and Lyuheng Yuan", title = "Parallel mining of large maximal quasi-cliques", journal = j-VLDB-J, volume = "31", number = "4", pages = "649--674", month = jul, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00712-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jun 25 16:46:59 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00712-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Kellou-Menouer:2022:SSS, author = "Kenza Kellou-Menouer and Nikolaos Kardoulakis and Haridimos Kondylakis", title = "A survey on semantic schema discovery", journal = j-VLDB-J, volume = "31", number = "4", pages = "675--710", month = jul, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00717-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jun 25 16:46:59 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00717-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Fritz:2022:EEC, author = "Manuel Fritz and Michael Behringer and Holger Schwarz", title = "Efficient exploratory clustering analyses in large-scale exploration processes", journal = j-VLDB-J, volume = "31", number = "4", pages = "711--732", month = jul, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00716-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jun 25 16:46:59 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00716-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zheng:2022:PPW, author = "Libin Zheng and Lei Chen and Peng Cheng", title = "Privacy-preserving worker allocation in crowdsourcing", journal = j-VLDB-J, volume = "31", number = "4", pages = "733--751", month = jul, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00713-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jun 25 16:46:59 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00713-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Qin:2022:IDR, author = "Xuedi Qin and Chengliang Chai and Mourad Ouzzani", title = "Interactively discovering and ranking desired tuples by data exploration", journal = j-VLDB-J, volume = "31", number = "4", pages = "753--777", month = jul, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00714-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jun 25 16:46:59 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00714-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhu:2022:OPP, author = "Yuqing Zhu and Jing Tang and Xueyan Tang", title = "Optimal price profile for influential nodes in online social networks", journal = j-VLDB-J, volume = "31", number = "4", pages = "779--795", month = jul, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00727-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jun 25 16:46:59 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00727-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Amagata:2022:FEP, author = "Daichi Amagata and Makoto Onizuka and Takahiro Hara", title = "Fast, exact, and parallel-friendly outlier detection algorithms with proximity graph in metric spaces", journal = j-VLDB-J, volume = "31", number = "4", pages = "797--821", month = jul, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00729-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jun 25 16:46:59 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00729-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Huang:2022:SIR, author = "Zi Huang and Yanyan Shen and Divesh Srivastava", title = "Special issue on responsible data management and data science", journal = j-VLDB-J, volume = "31", number = "5", pages = "823--823", month = sep, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00761-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 29 11:34:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00761-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2022:AOD, author = "Pei Li and Jaroslaw Szlichta and Divesh Srivastava", title = "{ABC} of order dependencies", journal = j-VLDB-J, volume = "31", number = "5", pages = "825--849", month = sep, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00696-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 29 11:34:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00696-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Principe:2022:AHS, author = "Renzo Arturo Alva Principe and Andrea Maurino and Blerina Spahiu", title = "{ABSTAT-HD}: a scalable tool for profiling very large knowledge graphs", journal = j-VLDB-J, volume = "31", number = "5", pages = "851--876", month = sep, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00704-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 29 11:34:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00704-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2022:FAP, author = "Qinyong Wang and Hongzhi Yin and Xiangliang Zhang", title = "Fast-adapting and privacy-preserving federated recommender system", journal = j-VLDB-J, volume = "31", number = "5", pages = "877--896", month = sep, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00700-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 29 11:34:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00700-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Xiang:2022:GGG, author = "Sheng Xiang and Dong Wen and Xuemin Lin", title = "General graph generators: experiments, analyses, and improvements", journal = j-VLDB-J, volume = "31", number = "5", pages = "897--925", month = sep, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00701-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 29 11:34:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00701-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Liu:2022:PGA, author = "Zifan Liu and Zhechun Zhou and Theodoros Rekatsinas", title = "Picket: guarding against corrupted data in tabular data during learning and inference", journal = j-VLDB-J, volume = "31", number = "5", pages = "927--955", month = sep, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00699-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 29 11:34:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00699-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ge:2022:MMD, author = "Yong-Feng Ge and Maria Orlowska and Yanchun Zhang", title = "{MDDE}: multitasking distributed differential evolution for privacy-preserving database fragmentation", journal = j-VLDB-J, volume = "31", number = "5", pages = "957--975", month = sep, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00718-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 29 11:34:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00718-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Panjei:2022:SOE, author = "Egawati Panjei and Le Gruenwald and Shejuti Silvia", title = "A survey on outlier explanations", journal = j-VLDB-J, volume = "31", number = "5", pages = "977--1008", month = sep, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00721-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 29 11:34:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00721-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zeng:2022:EAS, author = "Weixin Zeng and Xiang Zhao and Wei Wang", title = "On entity alignment at scale", journal = j-VLDB-J, volume = "31", number = "5", pages = "1009--1033", month = sep, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00703-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 29 11:34:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00703-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Xu:2022:PBP, author = "Qingyu Xu and Feng Zhang and Xiaoyong Du", title = "Payment behavior prediction on shared parking lots with {TR-GCN}", journal = j-VLDB-J, volume = "31", number = "5", pages = "1035--1058", month = sep, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00722-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 29 11:34:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00722-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Sadiq:2022:IRN, author = "Shazia Sadiq and Amir Aryani and Xiaofang Zhou", title = "Information Resilience: the nexus of responsible and agile approaches to information use", journal = j-VLDB-J, volume = "31", number = "5", pages = "1059--1084", month = sep, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00720-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 29 11:34:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00720-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Liu:2022:ECR, author = "Fanzhen Liu and Zhao Li and Quan Z. Sheng", title = "{eRiskCom}: an e-commerce risky community detection platform", journal = j-VLDB-J, volume = "31", number = "5", pages = "1085--1101", month = sep, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00723-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 29 11:34:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00723-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Grafberger:2022:DDD, author = "Stefan Grafberger and Paul Groth and Sebastian Schelter", title = "Data distribution debugging in machine learning pipelines", journal = j-VLDB-J, volume = "31", number = "5", pages = "1103--1126", month = sep, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00726-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 29 11:34:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00726-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2022:DTA, author = "Qian Li and Zhichao Wang and Guandong Xu", title = "Deep treatment-adaptive network for causal inference", journal = j-VLDB-J, volume = "31", number = "5", pages = "1127--1142", month = sep, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00724-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 29 11:34:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00724-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhang:2022:BCS, author = "Rui Zhang and Bayu Distiawan Trisedya and Jianzhong Qi", title = "A benchmark and comprehensive survey on knowledge graph entity alignment via representation learning", journal = j-VLDB-J, volume = "31", number = "5", pages = "1143--1168", month = sep, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00747-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 29 11:34:10 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00747-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Porobic:2022:SIB, author = "Danica Porobic", title = "Special issue on the best papers of {DaMoN 2020}", journal = j-VLDB-J, volume = "31", number = "6", pages = "1169--1169", month = nov, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00766-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Oct 22 11:01:17 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00766-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Funke:2022:LLQ, author = "Henning Funke and Jan M{\"u}hlig and Jens Teubner", title = "Low-latency query compilation", journal = j-VLDB-J, volume = "31", number = "6", pages = "1171--1184", month = nov, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00741-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Oct 22 11:01:17 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00741-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Bang:2022:FSC, author = "Tiemo Bang and Norman May and Ilia Petrov and Carsten Binnig", title = "The full story of 1000 cores", journal = j-VLDB-J, volume = "31", number = "6", pages = "1185--1213", month = nov, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00742-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Oct 22 11:01:17 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00742-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Pietrzyk:2022:SSV, author = "Johannes Pietrzyk and Alexander Krause and Dirk Habich and Wolfgang Lehner", title = "To share or not to share vector registers?", journal = j-VLDB-J, volume = "31", number = "6", pages = "1215--1236", month = nov, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00744-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Oct 22 11:01:17 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00744-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Balazinska:2022:EV, author = "Magdalena Balazinska and Xiaofang Zhou", title = "Editorial for {S.I.}: {VLDB 2020}", journal = j-VLDB-J, volume = "31", number = "6", pages = "1237--1238", month = nov, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00734-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Oct 22 11:01:17 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00734-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Huang:2022:OOC, author = "Yihe Huang and William Qian and Eddie Kohler and Barbara Liskov and Liuba Shrira", title = "Opportunities for optimism in contended main-memory multicore transactions", journal = j-VLDB-J, volume = "31", number = "6", pages = "1239--1261", month = nov, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00719-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Oct 22 11:01:17 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00719-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Kandula:2022:DIP, author = "Srikanth Kandula and Laurel Orr and Surajit Chaudhuri", title = "Data-induced predicates for sideways information passing in query optimizers", journal = j-VLDB-J, volume = "31", number = "6", pages = "1263--1290", month = nov, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00693-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Oct 22 11:01:17 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00693-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Herlihy:2022:CCD, author = "Maurice Herlihy and Barbara Liskov and Liuba Shrira", title = "Cross-chain deals and adversarial commerce", journal = j-VLDB-J, volume = "31", number = "6", pages = "1291--1309", month = nov, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00686-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Oct 22 11:01:17 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00686-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2022:AAC, author = "Yuanbing Li and Xian Wu and Yifei Jin and Jian Li and Guoliang Li and Jianhua Feng", title = "Adapative algorithms for crowd-aided categorization", journal = j-VLDB-J, volume = "31", number = "6", pages = "1311--1337", month = nov, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00685-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Oct 22 11:01:17 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00685-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zheng:2022:PLF, author = "Bolong Zheng and Xi Zhao and Lianggui Weng and Quoc Viet Hung Nguyen and Hang Liu and Christian S. Jensen", title = "{PM-LSH}: a fast and accurate in-memory framework for high-dimensional approximate {NN} and closest pair search", journal = j-VLDB-J, volume = "31", number = "6", pages = "1339--1363", month = nov, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00680-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Oct 22 11:01:17 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00680-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lyu:2022:MTE, author = "Bingqing Lyu and Lu Qin and Xuemin Lin and Ying Zhang and Zhengping Qian and Jingren Zhou", title = "Maximum and top-$k$ diversified biclique search at scale", journal = j-VLDB-J, volume = "31", number = "6", pages = "1365--1389", month = nov, year = "2022", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00681-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Oct 22 11:01:17 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-021-00681-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chen:2023:HHF, author = "Wei Chen and Weiqing Wang and Hongzhi Yin and Lei Zhao and Xiaofang Zhou", title = "{HFUL}: a hybrid framework for user account linkage across location-aware social networks", journal = j-VLDB-J, volume = "32", number = "1", pages = "1--22", month = jan, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00730-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 25 08:12:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00730-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ding:2023:FGE, author = "Zeyu Ding and Yuxin Wang and Yingtai Xiao and Guanhong Wang and Danfeng Zhang and Daniel Kifer", title = "Free gap estimates from the exponential mechanism, sparse vector, noisy max and related algorithms", journal = j-VLDB-J, volume = "32", number = "1", pages = "23--48", month = jan, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00728-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 25 08:12:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00728-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Fan:2023:MGC, author = "Wenfei Fan and Yuanhao Li and Muyang Liu and Can Lu", title = "Making graphs compact by lossless contraction", journal = j-VLDB-J, volume = "32", number = "1", pages = "49--73", month = jan, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00731-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 25 08:12:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00731-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lourenco:2023:BID, author = "Raoni Louren{\c{c}}o and Juliana Freire and Eric Simon and Gabriel Weber and Dennis Shasha", title = "{BugDoc}: Iterative debugging and explanation of pipeline", journal = j-VLDB-J, volume = "32", number = "1", pages = "75--101", month = jan, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00733-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 25 08:12:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "See correction \cite{Lourenco:2023:CBI}.", URL = "https://link.springer.com/article/10.1007/s00778-022-00733-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Pankowski:2023:ODF, author = "Tadeusz Pankowski", title = "Ontological databases with faceted queries", journal = j-VLDB-J, volume = "32", number = "1", pages = "103--121", month = jan, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00735-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 25 08:12:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00735-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Liu:2023:ZRN, author = "Gang Liu and Leying Chen and Shimin Chen", title = "{Zen+}: a robust {NUMA}-aware {OLTP} engine optimized for non-volatile main memory", journal = j-VLDB-J, volume = "32", number = "1", pages = "123--148", month = jan, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00737-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 25 08:12:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00737-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Fan:2023:ADG, author = "Wenfei Fan and Ruiqi Xu and Qiang Yin and Wenyuan Yu and Jingren Zhou", title = "Application-driven graph partitioning", journal = j-VLDB-J, volume = "32", number = "1", pages = "149--172", month = jan, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00736-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 25 08:12:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00736-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Miao:2023:AIR, author = "Dongjing Miao and Pengfei Zhang and Jianzhong Li and Ye Wang and Zhipeng Cai", title = "Approximation and inapproximability results on computing optimal repairs", journal = j-VLDB-J, volume = "32", number = "1", pages = "173--197", month = jan, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00738-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 25 08:12:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00738-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Maroulis:2023:RAA, author = "Stavros Maroulis and Nikos Bikakis and George Papastefanatos and Panos Vassiliadis and Yannis Vassiliou", title = "Resource-aware adaptive indexing for in situ visual exploration and analytics", journal = j-VLDB-J, volume = "32", number = "1", pages = "199--227", month = jan, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00739-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 25 08:12:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00739-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Huang:2023:DEM, author = "Jiacheng Huang and Wei Hu and Zhifeng Bao and Qijin Chen and Yuzhong Qu", title = "Deep entity matching with adversarial active learning", journal = j-VLDB-J, volume = "32", number = "1", pages = "229--255", month = jan, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00745-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 25 08:12:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00745-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2023:ABC, author = "Kai Wang and Xuemin Lin and Lu Qin and Wenjie Zhang and Ying Zhang", title = "Accelerated butterfly counting with vertex priority on bipartite graphs", journal = j-VLDB-J, volume = "32", number = "2", pages = "257--281", month = mar, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00746-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 25 08:12:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00746-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Nikookar:2023:DRS, author = "Sepideh Nikookar and Mohammadreza Esfandiari and Ria Mae Borromeo and Paras Sakharkar and Sihem Amer-Yahia and Senjuti Basu Roy", title = "Diversifying recommendations on sequences of sets", journal = j-VLDB-J, volume = "32", number = "2", pages = "283--304", month = mar, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00740-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 25 08:12:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00740-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Piai:2023:FGS, author = "Federico Piai and Paolo Atzeni and Paolo Merialdo and Divesh Srivastava", title = "Fine-grained semantic type discovery for heterogeneous sources using clustering", journal = j-VLDB-J, volume = "32", number = "2", pages = "305--324", month = mar, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00743-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 25 08:12:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00743-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Liu:2023:URR, author = "Hao Liu and Jindong Han and Yanjie Fu and Yanyan Li and Kai Chen and Hui Xiong", title = "Unified route representation learning for multi-modal transportation recommendation with spatiotemporal pre-training", journal = j-VLDB-J, volume = "32", number = "2", pages = "325--342", month = mar, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00748-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 25 08:12:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00748-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Kim:2023:FSQ, author = "Hyunjoon Kim and Yunyoung Choi and Kunsoo Park and Xuemin Lin and Seok-Hee Hong and Wook-Shin Han", title = "Fast subgraph query processing and subgraph matching via static and dynamic equivalences", journal = j-VLDB-J, volume = "32", number = "2", pages = "343--368", month = mar, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00749-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 25 08:12:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00749-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Nguyen:2023:DRL, author = "Thanh Tam Nguyen and Thanh Trung Huynh and Hongzhi Yin and Matthias Weidlich and Thanh Thi Nguyen and Thai Son Mai and Quoc Viet Hung Nguyen", title = "Detecting rumours with latency guarantees using massive streaming data", journal = j-VLDB-J, volume = "32", number = "2", pages = "369--387", month = mar, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00750-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 25 08:12:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00750-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2023:VSE, author = "Yang Li and Yu Shen and Wentao Zhang and Ce Zhang and Bin Cui", title = "{VolcanoML}: speeding up end-to-end {AutoML} via scalable search space decomposition", journal = j-VLDB-J, volume = "32", number = "2", pages = "389--413", month = mar, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00752-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 25 08:12:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00752-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Bouganim:2023:HDP, author = "Luc Bouganim and Julien Loudet and Iulian Sandu Popa", title = "Highly distributed and privacy-preserving queries on personal data management systems", journal = j-VLDB-J, volume = "32", number = "2", pages = "415--445", month = mar, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00753-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 25 08:12:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00753-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chen:2023:IGL, author = "Jiazun Chen and Jun Gao and Bin Cui", title = "{ICS-GNN$^+$}: lightweight interactive community search via graph neural network", journal = j-VLDB-J, volume = "32", number = "2", pages = "447--467", month = mar, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00754-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 25 08:12:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00754-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Boniol:2023:CUS, author = "Paul Boniol and Michele Linardi and Federico Roncallo and Themis Palpanas and Mohammed Meftah and Emmanuel Remy", title = "Correction to: {Unsupervised} and scalable subsequence anomaly detection in large data series", journal = j-VLDB-J, volume = "32", number = "2", pages = "469--469", month = mar, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00678-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 25 08:12:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "See \cite{Boniol:2021:USS}.", URL = "https://link.springer.com/article/10.1007/s00778-021-00678-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Kossmann:2023:CDD, author = "Jan Kossmann and Thorsten Papenbrock and Felix Naumann", title = "Correction to: Data dependencies for query optimization: a survey", journal = j-VLDB-J, volume = "32", number = "2", pages = "471--471", month = mar, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-021-00710-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 25 08:12:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "See \cite{Kossmann:2022:DDQ}.", URL = "https://link.springer.com/article/10.1007/s00778-021-00710-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lourenco:2023:CBI, author = "Raoni Louren{\c{c}}o and Juliana Freire and Eric Simon and Gabriel Weber and Dennis Shasha", title = "Correction to: {BugDoc} Iterative debugging and explanation of pipeline executions", journal = j-VLDB-J, volume = "32", number = "2", pages = "473--473", month = mar, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00751-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 25 08:12:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "See \cite{Lourenco:2023:BID}.", URL = "https://link.springer.com/article/10.1007/s00778-022-00751-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Azzalini:2023:EDA, author = "Fabio Azzalini and Davide Piantella and Emanuele Rabosio and Letizia Tanca", title = "Enhancing domain-aware multi-truth data fusion using copy-based source authority and value similarity", journal = j-VLDB-J, volume = "32", number = "3", pages = "475--500", month = may, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00757-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 21 10:46:50 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00757-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ahmed:2023:RST, author = "Pritom Ahmed and Ahmed Eldawy and Vagelis Hristidis and Vassilis J. Tsotras", title = "Reverse spatial top-$k$ keyword queries", journal = j-VLDB-J, volume = "32", number = "3", pages = "501--524", month = may, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00759-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 21 10:46:50 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00759-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2023:POB, author = "Meng Li and Rongbiao Xie and Deyi Chen and Haipeng Dai and Rong Gu and He Huang and Wanchun Dou and Guihai Chen", title = "A {Pareto} optimal {Bloom} filter family with hash adaptivity", journal = j-VLDB-J, volume = "32", number = "3", pages = "525--548", month = may, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00755-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 21 10:46:50 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00755-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lei:2023:HDP, author = "Chuan Lei and Abdul Quamar and Vasilis Efthymiou and Fatma {\"O}zcan and Rana Alotaibi", title = "{HERMES}: data placement and schema optimization for enterprise knowledge bases", journal = j-VLDB-J, volume = "32", number = "3", pages = "549--574", month = may, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00756-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 21 10:46:50 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00756-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2023:ENQ, author = "Jiajia Li and Cancan Ni and Dan He and Lei Li and Xiufeng Xia and Xiaofang Zhou", title = "Efficient $k$ {NN} query for moving objects on time-dependent road networks", journal = j-VLDB-J, volume = "32", number = "3", pages = "575--594", month = may, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00758-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 21 10:46:50 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00758-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Liu:2023:MCS, author = "Ziyi Liu and Lei Li and Mengxuan Zhang and Wen Hua and Xiaofang Zhou", title = "Multi-constraint shortest path using forest hop labeling", journal = j-VLDB-J, volume = "32", number = "3", pages = "595--621", month = may, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00760-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 21 10:46:50 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00760-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhang:2023:LBQ, author = "Pengcheng Zhang and Bin Yao and Chao Gao and Bin Wu and Xiao He and Feifei Li and Yuanfei Lu and Chaoqun Zhan and Feilong Tang", title = "Learning-based query optimization for multi-probe approximate nearest neighbor search", journal = j-VLDB-J, volume = "32", number = "3", pages = "623--645", month = may, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00762-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 21 10:46:50 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00762-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Luo:2023:TMH, author = "Qi Luo and Dongxiao Yu and Zhipeng Cai and Xuemin Lin and Guanghui Wang and Xiuzhen Cheng", title = "Toward maintenance of hypercores in large-scale dynamic hypergraphs", journal = j-VLDB-J, volume = "32", number = "3", pages = "647--664", month = may, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00763-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 21 10:46:50 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00763-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhang:2023:PPB, author = "Liang Zhang and Noura Alghamdi and Huayi Zhang and Mohamed Y. Eltabakh and Elke A. Rundensteiner", title = "{PARROT}: pattern-based correlation exploitation in big partitioned data series", journal = j-VLDB-J, volume = "32", number = "3", pages = "665--688", month = may, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00767-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 21 10:46:50 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00767-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wellenzohn:2023:RSC, author = "Kevin Wellenzohn and Michael H. B{\"o}hlen and Sven Helmer and Antoine Pietri and Stefano Zacchiroli", title = "Robust and scalable content-and-structure indexing", journal = j-VLDB-J, volume = "32", number = "4", pages = "689--715", month = jul, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00764-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jun 1 08:33:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00764-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Miao:2023:PPP, author = "Xupeng Miao and Wentao Zhang and Yuezihan Jiang and Fangcheng Fu and Yingxia Shao and Lei Chen and Yangyu Tao and Gang Cao and Bin Cui", title = "{P$^2$CG}: a privacy preserving collaborative graph neural network training framework", journal = j-VLDB-J, volume = "32", number = "4", pages = "717--736", month = jul, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00768-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jun 1 08:33:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00768-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Islam:2023:GFE, author = "Md Mouinul Islam and Mahsa Asadi and Sihem Amer-Yahia and Senjuti Basu Roy", title = "A generic framework for efficient computation of top-$k$ diverse results", journal = j-VLDB-J, volume = "32", number = "4", pages = "737--761", month = jul, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00770-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jun 1 08:33:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00770-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Echihabi:2023:PDS, author = "Karima Echihabi and Theophanis Tsandilas and Anna Gogolou and Anastasia Bezerianos and Themis Palpanas", title = "{ProS}: data series progressive $k$-{NN} similarity search and classification with probabilistic quality guarantees", journal = j-VLDB-J, volume = "32", number = "4", pages = "763--789", month = jul, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00771-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jun 1 08:33:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00771-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Whang:2023:DCQ, author = "Steven Euijong Whang and Yuji Roh and Hwanjun Song and Jae-Gil Lee", title = "Data collection and quality challenges in deep learning: a data-centric {AI} perspective", journal = j-VLDB-J, volume = "32", number = "4", pages = "791--813", month = jul, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00775-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jun 1 08:33:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00775-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lou:2023:TTA, author = "Yunkai Lou and Chaokun Wang and Tiankai Gu and Hao Feng and Jun Chen and Jeffrey Xu Yu", title = "Time-topology analysis on temporal graphs", journal = j-VLDB-J, volume = "32", number = "4", pages = "815--843", month = jul, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00772-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jun 1 08:33:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00772-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ntroumpogiannis:2023:MLA, author = "Antonios Ntroumpogiannis and Michail Giannoulis and Nikolaos Myrtakis and Vassilis Christophides and Eric Simon and Ioannis Tsamardinos", title = "A meta-level analysis of online anomaly detectors", journal = j-VLDB-J, volume = "32", number = "4", pages = "845--886", month = jul, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00773-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jun 1 08:33:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00773-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhang:2023:SSQ, author = "Dongxiang Zhang and Zhihao Chang and Dingyu Yang and Dongsheng Li and Kian-Lee Tan and Ke Chen and Gang Chen", title = "{SQUID}: subtrajectory query in trillion-scale {GPS} database", journal = j-VLDB-J, volume = "32", number = "4", pages = "887--904", month = jul, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00777-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jun 1 08:33:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00777-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Katsogiannis-Meimarakis:2023:SDL, author = "George Katsogiannis-Meimarakis and Georgia Koutrika", title = "A survey on deep learning approaches for text-to-{SQL}", journal = j-VLDB-J, volume = "32", number = "4", pages = "905--936", month = jul, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00776-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Jun 1 08:33:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00776-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhao:2023:LSS, author = "Kangfei Zhao and Jeffrey Xu Yu and Qiyan Li and Hao Zhang and Yu Rong", title = "Learned sketch for subgraph counting: a holistic approach", journal = j-VLDB-J, volume = "32", number = "5", pages = "937--962", month = sep, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00781-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Aug 18 07:36:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00781-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yamada:2023:ALT, author = "Masaya Yamada and Hiroyuki Kitagawa and Toshiyuki Amagasa and Akiyoshi Matono", title = "Augmented lineage: traceability of data analysis including complex {UDF} processing", journal = j-VLDB-J, volume = "32", number = "5", pages = "963--983", month = sep, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00769-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Aug 18 07:36:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00769-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Verwiebe:2023:SWT, author = "Juliane Verwiebe and Philipp M. Grulich and Jonas Traub and Volker Markl", title = "Survey of window types for aggregation in stream processing systems", journal = j-VLDB-J, volume = "32", number = "5", pages = "985--1011", month = sep, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00778-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Aug 18 07:36:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "See \cite{Verwiebe:2024:CSW}.", URL = "https://link.springer.com/article/10.1007/s00778-022-00778-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhou:2023:BCB, author = "Alexander Zhou and Yue Wang and Lei Chen", title = "Butterfly counting and bitruss decomposition on uncertain bipartite graphs", journal = j-VLDB-J, volume = "32", number = "5", pages = "1013--1036", month = sep, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00782-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Aug 18 07:36:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00782-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Hirsch:2023:EDK, author = "Vitali Hirsch and Peter Reimann and Dennis Treder-Tschechlov and Holger Schwarz and Bernhard Mitschang", title = "Exploiting domain knowledge to address class imbalance and a heterogeneous feature space in multi-class classification", journal = j-VLDB-J, volume = "32", number = "5", pages = "1037--1064", month = sep, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00780-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Aug 18 07:36:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00780-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Xu:2023:LUI, author = "Jia Xu and Zulong Chen and Wanjie Tao and Ziyi Wang and Detao Lv and Yao Yu and Chuanfei Xu", title = "Leveraging user itinerary to improve personalized deep matching at {Fliggy}", journal = j-VLDB-J, volume = "32", number = "5", pages = "1065--1086", month = sep, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00787-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Aug 18 07:36:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00787-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Gou:2023:SWB, author = "Xiangyang Gou and Lei Zou", title = "Sliding window-based approximate triangle counting with bounded memory usage", journal = j-VLDB-J, volume = "32", number = "5", pages = "1087--1110", month = sep, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00783-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Aug 18 07:36:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00783-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Schiavio:2023:DDQ, author = "Filippo Schiavio and Daniele Bonetta and Walter Binder", title = "{DynQ}: a dynamic query engine with query-reuse capabilities embedded in a polyglot runtime", journal = j-VLDB-J, volume = "32", number = "5", pages = "1111--1135", month = sep, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00784-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Aug 18 07:36:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00784-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yang:2023:BCE, author = "Jianye Yang and Yun Peng and Dian Ouyang and Wenjie Zhang and Xuemin Lin and Xiang Zhao", title = "$ (p, q)$-biclique counting and enumeration for large sparse bipartite graphs", journal = j-VLDB-J, volume = "32", number = "5", pages = "1137--1161", month = sep, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00786-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Aug 18 07:36:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00786-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Naumann:2023:ESI, author = "Felix Naumann and Xin Luna Dong", title = "Editorial: Special Issue for Selected Papers of {VLDB 2021}", journal = j-VLDB-J, volume = "32", number = "6", pages = "1163--1163", month = nov, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00792-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Oct 21 08:56:16 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00792-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Fent:2023:PPE, author = "Philipp Fent and Altan Birler and Thomas Neumann", title = "Practical planning and execution of groupjoin and nested aggregates", journal = j-VLDB-J, volume = "32", number = "6", pages = "1165--1190", month = nov, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00765-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Oct 21 08:56:16 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00765-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Farias:2023:LDD, author = "Victor A. E. Farias and Felipe T. Brito and Cheryl Flynn and Javam C. Machado and Subhabrata Majumdar and Divesh Srivastava", title = "Local dampening: differential privacy for non-numeric queries via local sensitivity", journal = j-VLDB-J, volume = "32", number = "6", pages = "1191--1214", month = nov, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-022-00774-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Oct 21 08:56:16 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-022-00774-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2023:EEM, author = "Yuliang Li and Jinfeng Li and Yoshi Suhara and AnHai Doan and Wang-Chiew Tan", title = "Effective entity matching with transformers", journal = j-VLDB-J, volume = "32", number = "6", pages = "1215--1235", month = nov, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00779-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Oct 21 08:56:16 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00779-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yang:2023:PSE, author = "Renchi Yang and Jieming Shi and Xiaokui Xiao and Yin Yang and Sourav S. Bhowmick and Juncheng Liu", title = "{PANE}: scalable and effective attributed network embedding", journal = j-VLDB-J, volume = "32", number = "6", pages = "1237--1262", month = nov, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00790-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Oct 21 08:56:16 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00790-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ouyang:2023:WHM, author = "Dian Ouyang and Dong Wen and Lu Qin and Lijun Chang and Xuemin Lin and Ying Zhang", title = "When hierarchy meets 2-hop-labeling: efficient shortest distance and path queries on road networks", journal = j-VLDB-J, volume = "32", number = "6", pages = "1263--1287", month = nov, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00789-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Oct 21 08:56:16 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00789-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Qian:2023:IDD, author = "Chaoqin Qian and Menglu Li and Zijing Tan and Ai Ran and Shuai Ma", title = "Incremental discovery of denial constraints", journal = j-VLDB-J, volume = "32", number = "6", pages = "1289--1313", month = nov, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00788-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Oct 21 08:56:16 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00788-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2023:TGC, author = "Zuozhi Wang and Kai Zeng and Botong Huang and Wei Chen and Xiaozong Cui and Bo Wang and Ji Liu and Liya Fan and Dachuan Qu and Zhenyu Hou and Tao Guan and Chen Li and Jingren Zhou", title = "{Tempura}: a general cost-based optimizer framework for incremental data processing (Journal Version)", journal = j-VLDB-J, volume = "32", number = "6", pages = "1315--1342", month = nov, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00785-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Oct 21 08:56:16 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00785-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Hellings:2023:BSB, author = "Jelle Hellings and Mohammad Sadoghi", title = "{ByShard}: sharding in a {Byzantine} environment", journal = j-VLDB-J, volume = "32", number = "6", pages = "1343--1367", month = nov, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00794-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Oct 21 08:56:16 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00794-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Papadakis:2023:AOO, author = "George Papadakis and Vasilis Efthymiou and Emmanouil Thanos and Oktie Hassanzadeh and Peter Christen", title = "An analysis of one-to-one matching algorithms for entity resolution", journal = j-VLDB-J, volume = "32", number = "6", pages = "1369--1400", month = nov, year = "2023", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00791-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Oct 21 08:56:16 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00791-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2024:SDC, author = "Haoyu Wang and Aoqian Zhang and Shaoxu Song and Jianmin Wang", title = "Streaming data cleaning based on speed change", journal = j-VLDB-J, volume = "33", number = "1", pages = "1--24", month = jan, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00796-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00796-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Aghasadeghi:2024:TGP, author = "Amir Aghasadeghi and Jan {Van den Bussche} and Julia Stoyanovich", title = "Temporal graph patterns by timed automata", journal = j-VLDB-J, volume = "33", number = "1", pages = "25--47", month = jan, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00795-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00795-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chang:2024:NOA, author = "Lijun Chang and Zhiyi Wang", title = "A near-optimal approach to edge connectivity-based hierarchical graph decomposition", journal = j-VLDB-J, volume = "33", number = "1", pages = "49--71", month = jan, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00797-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00797-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Christodoulou:2024:HHI, author = "George Christodoulou and Panagiotis Bouros and Nikos Mamoulis", title = "{HINT}: a hierarchical interval index for {Allen} relationships", journal = j-VLDB-J, volume = "33", number = "1", pages = "73--100", month = jan, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00798-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00798-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Farhan:2024:BBD, author = "Muhammad Farhan and Henning Koehler and Qing Wang", title = "{BatchHL$^+$}: batch dynamic labelling for distance queries on large-scale networks", journal = j-VLDB-J, volume = "33", number = "1", pages = "101--129", month = jan, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00799-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00799-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Meilicke:2024:ABR, author = "Christian Meilicke and Melisachew Wudage Chekol and Patrick Betz and Manuel Fink and Heiner Stuckeschmidt", title = "Anytime bottom-up rule learning for large-scale knowledge graph completion", journal = j-VLDB-J, volume = "33", number = "1", pages = "131--161", month = jan, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00800-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "See correction \cite{Meilicke:2025:CAB}.", URL = "https://link.springer.com/article/10.1007/s00778-023-00800-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhao:2024:CBT, author = "Yan Zhao and Kai Zheng and Ziwei Wang and Liwei Deng and Bin Yang and Torben Bach Pedersen and Christian S. Jensen and Xiaofang Zhou", title = "Coalition-based task assignment with priority-aware fairness in spatial crowdsourcing", journal = j-VLDB-J, volume = "33", number = "1", pages = "163--184", month = jan, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00802-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00802-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Shaham:2024:SSD, author = "Sina Shaham and Gabriel Ghinita and Cyrus Shahabi", title = "Supporting secure dynamic alert zones using searchable encryption and graph embedding", journal = j-VLDB-J, volume = "33", number = "1", pages = "185--206", month = jan, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00803-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00803-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ma:2024:ADD, author = "Chenhao Ma and Yixiang Fang and Reynold Cheng and Laks V. S. Lakshmanan and Xiaolin Han and Xiaodong Li", title = "Accelerating directed densest subgraph queries with software and hardware approaches", journal = j-VLDB-J, volume = "33", number = "1", pages = "207--230", month = jan, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00805-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00805-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Mouratidis:2024:QCD, author = "Kyriakos Mouratidis and Keming Li and Bo Tang", title = "Quantifying the competitiveness of a dataset in relation to general preferences", journal = j-VLDB-J, volume = "33", number = "1", pages = "231--250", month = jan, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00804-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00804-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Verwiebe:2024:CSW, author = "Juliane Verwiebe and Philipp M. Grulich and Jonas Traub and Volker Markl", title = "Correction to: {Survey} of window types for aggregation in stream processing systems", journal = j-VLDB-J, volume = "33", number = "1", pages = "251--251", month = jan, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00793-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "See \cite{Verwiebe:2023:SWT}.", URL = "https://link.springer.com/article/10.1007/s00778-023-00793-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yang:2024:CTF, author = "Fan Yang and Faisal M. Almutairi and Hyun Ah Song and Christos Faloutsos and Nicholas D. Sidiropoulos and Vladimir Zadorozhny", title = "Correction to: {TurboLift}: fast accuracy lifting for historical data recovery", journal = j-VLDB-J, volume = "33", number = "1", pages = "253--253", month = jan, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00801-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "See \cite{Yang:2020:TFA}.", URL = "https://link.springer.com/article/10.1007/s00778-023-00801-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Liu:2024:TDS, author = "Tongyu Liu and Ju Fan and Guoliang Li and Nan Tang and Xiaoyong Du", title = "Tabular data synthesis with generative adversarial networks: design space and optimizations", journal = j-VLDB-J, volume = "33", number = "2", pages = "255--280", month = mar, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00807-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00807-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Karpov:2024:MFA, author = "Nikolai Karpov and Haoyu Zhang and Qin Zhang", title = "{MinJoin++}: a fast algorithm for string similarity joins under edit distance", journal = j-VLDB-J, volume = "33", number = "2", pages = "281--299", month = mar, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00806-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00806-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Usta:2024:XEN, author = "Arif Usta and Akifhan Karakayali and {\"O}zg{\"u}r Ulusoy", title = "{xDBTagger}: explainable natural language interface to databases using keyword mappings and schema graph", journal = j-VLDB-J, volume = "33", number = "2", pages = "301--321", month = mar, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00809-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00809-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2024:CEU, author = "Jiayi Wang and Chengliang Chai and Jiabin Liu and Guoliang Li", title = "Cardinality estimation using normalizing flow", journal = j-VLDB-J, volume = "33", number = "2", pages = "323--348", month = mar, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00808-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00808-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Arroyuelo:2024:ORC, author = "Diego Arroyuelo and Adri{\'a}n G{\'o}mez-Brand{\'o}n and Aidan Hogan and Gonzalo Navarro and Javiel Rojas-Ledesma", title = "Optimizing {RPQs} over a compact graph representation", journal = j-VLDB-J, volume = "33", number = "2", pages = "349--374", month = mar, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00811-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00811-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chen:2024:QEP, author = "Zhiwen Chen and Daokun Hu and Wenkui Che and Jianhua Sun and Hao Chen", title = "A quantitative evaluation of persistent memory hash indexes", journal = j-VLDB-J, volume = "33", number = "2", pages = "375--397", month = mar, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00812-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00812-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Abello:2024:EEM, author = "Alberto Abell{\'o} and James Cheney", title = "{Eris}: efficiently measuring discord in multidimensional sources", journal = j-VLDB-J, volume = "33", number = "2", pages = "399--423", month = mar, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00810-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00810-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Jiang:2024:SEM, author = "Jiawei Jiang and Shaoduo Gan and Bo Du and Gustavo Alonso and Ana Klimovic and Ankit Singla and Wentao Wu and Sheng Wang and Ce Zhang", title = "A systematic evaluation of machine learning on serverless infrastructure", journal = j-VLDB-J, volume = "33", number = "2", pages = "425--449", month = mar, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00813-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00813-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhang:2024:STS, author = "Shuhao Zhang and Juan Soto and Volker Markl", title = "A survey on transactional stream processing", journal = j-VLDB-J, volume = "33", number = "2", pages = "451--479", month = mar, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00814-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00814-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{dHondt:2024:EDM, author = "Jens E. d'Hondt and Koen Minartz and Odysseas Papapetrou", title = "Efficient detection of multivariate correlations with different correlation measures", journal = j-VLDB-J, volume = "33", number = "2", pages = "481--505", month = mar, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00815-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00815-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Fragkoulis:2024:SES, author = "Marios Fragkoulis and Paris Carbone and Vasiliki Kalavri and Asterios Katsifodimos", title = "A survey on the evolution of stream processing systems", journal = j-VLDB-J, volume = "33", number = "2", pages = "507--541", month = mar, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00819-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00819-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhao:2024:RRE, author = "Hongyao Zhao and Jingyao Li and Wei Lu and Qian Zhang and Wanqing Yang and Jiajia Zhong and Meihui Zhang and Haixiang Li and Xiaoyong Du and Anqun Pan", title = "{RCBench}: an {RDMA}-enabled transaction framework for analyzing concurrency control algorithms", journal = j-VLDB-J, volume = "33", number = "2", pages = "543--567", month = mar, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00821-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 19 08:11:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00821-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhang:2024:LCS, author = "Junhua Zhang and Long Yuan and Wentao Li and Lu Qin and Ying Zhang and Wenjie Zhang", title = "Label-constrained shortest path query processing on road networks", journal = j-VLDB-J, volume = "33", number = "3", pages = "569--593", month = may, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00825-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 24 13:05:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00825-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Fang:2024:NWC, author = "James Fang and Dmitry Lychagin and Michael J. Carey and Vassilis J. Tsotras", title = "A new window clause for {SQL++}", journal = j-VLDB-J, volume = "33", number = "3", pages = "595--623", month = may, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00830-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 24 13:05:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00830-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lee:2024:HMT, author = "Geon Lee and Seokbum Yoon and Jihoon Ko and Hyunju Kim and Kijung Shin", title = "Hypergraph motifs and their extensions beyond binary", journal = j-VLDB-J, volume = "33", number = "3", pages = "625--665", month = may, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00827-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 24 13:05:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00827-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Liao:2024:SDG, author = "Ningyi Liao and Dingheng Mo and Siqiang Luo and Xiang Li and Pengcheng Yin", title = "Scalable decoupling graph neural network with feature-oriented optimization", journal = j-VLDB-J, volume = "33", number = "3", pages = "667--683", month = may, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00829-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 24 13:05:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00829-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Liang:2024:STC, author = "Anqi Liang and Bin Yao and Bo Wang and Yinpei Liu and Zhida Chen and Jiong Xie and Feifei Li", title = "Sub-trajectory clustering with deep reinforcement learning", journal = j-VLDB-J, volume = "33", number = "3", pages = "685--702", month = may, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00833-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 24 13:05:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00833-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yao:2024:ISB, author = "Kai Yao and Lijun Chang and Jeffrey Xu Yu", title = "Identifying similar-bicliques in bipartite graphs", journal = j-VLDB-J, volume = "33", number = "3", pages = "703--726", month = may, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00834-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 24 13:05:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00834-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Xia:2024:TSD, author = "Tianrui Xia and Jinzhao Xiao and Yuxiang Huang and Changyu Hu and Shaoxu Song and Xiangdong Huang and Jianmin Wang", title = "Time series data encoding in {Apache IoTDB}: comparative analysis and recommendation", journal = j-VLDB-J, volume = "33", number = "3", pages = "727--752", month = may, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00840-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 24 13:05:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00840-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ting:2024:NDT, author = "Kai Ming Ting and Zongyou Liu and Lei Gong and Hang Zhang and Ye Zhu", title = "A new distributional treatment for time series anomaly detection", journal = j-VLDB-J, volume = "33", number = "3", pages = "753--780", month = may, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00832-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 24 13:05:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00832-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Gong:2024:IAI, author = "Shufeng Gong and Chao Tian and Qiang Yin and Zhengdong Wang and Song Yu and Yanfeng Zhang and Wenyuan Yu and Liang Geng and Chong Fu and Ge Yu and Jingren Zhou", title = "{Ingress}: an automated incremental graph processing system", journal = j-VLDB-J, volume = "33", number = "3", pages = "781--806", month = may, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00838-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 24 13:05:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00838-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lin:2024:RRE, author = "Hong Lin and Ke Chen and Dawei Jiang and Lidan Shou and Gang Chen", title = "{Refiner}: a reliable and efficient incentive-driven federated learning system powered by blockchain", journal = j-VLDB-J, volume = "33", number = "3", pages = "807--831", month = may, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00839-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 24 13:05:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "See correction \cite{Lin:2024:CRR}.", URL = "https://link.springer.com/article/10.1007/s00778-024-00839-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Jiang:2024:HGM, author = "Jiawei Jiang and Yi Wei and Yu Liu and Wentao Wu and Chuang Hu and Zhigao Zheng and Ziyi Zhang and Yingxia Shao and Ce Zhang", title = "How good are machine learning clouds? {Benchmarking} two snapshots over 5 years", journal = j-VLDB-J, volume = "33", number = "3", pages = "833--857", month = may, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00842-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 24 13:05:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00842-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Magalhaes:2024:MDM, author = "Arlino Magalhaes and Angelo Brayner and Jose Maria Monteiro", title = "{MM-DIRECT}: Main memory database instant recovery with tuple consistent checkpoint", journal = j-VLDB-J, volume = "33", number = "3", pages = "859--882", month = may, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00846-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 24 13:05:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00846-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Jia:2024:HNA, author = "Tong Jia and Ying Li and Yong Yang and Gang Huang", title = "{Hilogx}: noise-aware log-based anomaly detection with human feedback", journal = j-VLDB-J, volume = "33", number = "3", pages = "883--900", month = may, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00843-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Apr 24 13:05:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00843-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Boehm:2024:SIM, author = "Matthias Boehm and Nesime Tatbul", title = "Special issue on ``{Machine} learning and databases''", journal = j-VLDB-J, volume = "33", number = "4", pages = "901--901", month = jul, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00848-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 5 15:56:54 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00848-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Kara:2024:FIA, author = "Ahmet Kara and Milos Nikolic and Dan Olteanu and Haozhe Zhang", title = "{F-IVM}: analytics over relational databases under updates", journal = j-VLDB-J, volume = "33", number = "4", pages = "903--929", month = jul, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00817-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 5 15:56:54 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00817-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Huang:2024:ERA, author = "Enhui Huang and Yanlei Diao and Anna Liu and Liping Peng and Luciano {Di Palma}", title = "Efficient and robust active learning methods for interactive database exploration", journal = j-VLDB-J, volume = "33", number = "4", pages = "931--956", month = jul, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00816-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 5 15:56:54 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00816-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Neutatz:2024:AHC, author = "Felix Neutatz and Marius Lindauer and Ziawasch Abedjan", title = "{AutoML} in heavily constrained applications", journal = j-VLDB-J, volume = "33", number = "4", pages = "957--979", month = jul, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00820-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 5 15:56:54 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00820-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Meduri:2024:AAL, author = "Venkata Vamsikrishna Meduri and Abdul Quamar and Chuan Lei and Xiao Qin and Berthold Reinwald", title = "{Alfa}: active learning for graph neural network-based semantic schema alignment", journal = j-VLDB-J, volume = "33", number = "4", pages = "981--1011", month = jul, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00822-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 5 15:56:54 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00822-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Olteanu:2024:GRD, author = "Dan Olteanu and Nils Vortmeier and {\Dbar}or{\dbar}e {\v{Z}}ivanovi{\'c}", title = "{Givens} rotations for {$ Q R $} decomposition, {SVD} and {PCA} over database joins", journal = j-VLDB-J, volume = "33", number = "4", pages = "1013--1037", month = jul, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00818-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 5 15:56:54 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00818-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", keywords = "FIGARO (algorithm for computing the upper-triangular matrix in the $Q R$ decomposition of the matrix)", } @Article{Paganelli:2024:MFA, author = "Matteo Paganelli and Donato Tiano and Francesco Guerra", title = "A multi-facet analysis of {BERT}-based entity matching models", journal = j-VLDB-J, volume = "33", number = "4", pages = "1039--1064", month = jul, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00824-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 5 15:56:54 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00824-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Luo:2024:MPM, author = "Yongping Luo and Peiquan Jin and Zhaole Chu and Xiaoliang Wang and Yigui Yuan and Zhou Zhang and Yun Luo and Xufei Wu and Peng Zou", title = "{Morphtree}: a polymorphic main-memory learned index for dynamic workloads", journal = j-VLDB-J, volume = "33", number = "4", pages = "1065--1084", month = jul, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00823-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 5 15:56:54 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00823-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Trummer:2024:DBM, author = "Immanuel Trummer", title = "{DB-BERT}: making database tuning tools ``read'' the manual", journal = j-VLDB-J, volume = "33", number = "4", pages = "1085--1104", month = jul, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00831-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 5 15:56:54 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00831-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Huynh:2024:TFR, author = "Andy Huynh and Harshal A. Chaudhari and Evimaria Terzi and Manos Athanassoulis", title = "Towards flexibility and robustness of {LSM} trees", journal = j-VLDB-J, volume = "33", number = "4", pages = "1105--1128", month = jul, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00826-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 5 15:56:54 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00826-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Redyuk:2024:ADD, author = "Sergey Redyuk and Zoi Kaoudi and Sebastian Schelter and Volker Markl", title = "Assisted design of data science pipelines", journal = j-VLDB-J, volume = "33", number = "4", pages = "1129--1153", month = jul, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00835-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 5 15:56:54 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00835-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Vu:2024:LBF, author = "Tin Vu and Alberto Belussi and Sara Migliorini and Ahmed Eldawy", title = "A learning-based framework for spatial join processing: estimation, optimization and tuning", journal = j-VLDB-J, volume = "33", number = "4", pages = "1155--1177", month = jul, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00836-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 5 15:56:54 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00836-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Song:2024:SST, author = "Yuanfeng Song and Raymond Chi-Wing Wong and Xuefang Zhao", title = "{Speech-to-SQL}: toward speech-driven {SQL} query generation from natural language question", journal = j-VLDB-J, volume = "33", number = "4", pages = "1179--1201", month = jul, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00837-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 5 15:56:54 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00837-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Shahbazi:2024:REI, author = "Nima Shahbazi and Abolfazl Asudeh", title = "Reliability evaluation of individual predictions: a data-centric approach", journal = j-VLDB-J, volume = "33", number = "4", pages = "1203--1230", month = jul, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00857-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Aug 5 15:56:54 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00857-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Xu:2024:SGD, author = "Lijie Xu and Shuang Qiu and Binhang Yuan and Jiawei Jiang and Cedric Renggli and Shaoduo Gan and Kaan Kara and Guoliang Li and Ji Liu and Wentao Wu and Jieping Ye and Ce Zhang", title = "Stochastic gradient descent without full data shuffle: with applications to in-database machine learning and deep learning systems", journal = j-VLDB-J, volume = "33", number = "5", pages = "1231--1255", month = sep, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00845-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Aug 18 07:21:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00845-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Karegar:2024:DAI, author = "Reza Karegar and Melicaalsadat Mirsafian and Parke Godfrey and Lukasz Golab and Mehdi Kargar and Divesh Srivastava and Jaroslaw Szlichta", title = "Discovering approximate implicit domain orders through order dependencies", journal = j-VLDB-J, volume = "33", number = "5", pages = "1257--1282", month = sep, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00847-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Aug 18 07:21:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00847-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chang:2024:DDT, author = "Jiwon Chang and Bohan Cui and Fatemeh Nargesian and Abolfazl Asudeh and H. V. Jagadish", title = "Data distribution tailoring revisited: cost-efficient integration of representative data", journal = j-VLDB-J, volume = "33", number = "5", pages = "1283--1306", month = sep, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00849-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Aug 18 07:21:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00849-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Chen:2024:LAL, author = "Xingguang Chen and Rong Zhu and Bolin Ding and Sibo Wang and Jingren Zhou", title = "{Lero}: applying learning-to-rank in query optimizer", journal = j-VLDB-J, volume = "33", number = "5", pages = "1307--1331", month = sep, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00850-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Aug 18 07:21:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00850-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Preti:2024:HDO, author = "Giulia Preti and Gianmarco {De Francisci Morales} and Francesco Bonchi", title = "Hyper-distance oracles in hypergraphs", journal = j-VLDB-J, volume = "33", number = "5", pages = "1333--1356", month = sep, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00851-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Aug 18 07:21:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00851-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Shi:2024:ECE, author = "Gongyu Shi and Geng Wang and Shi-Feng Sun and Dawu Gu", title = "Efficient cryptanalysis of an encrypted database supporting data interoperability", journal = j-VLDB-J, volume = "33", number = "5", pages = "1357--1375", month = sep, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00852-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Aug 18 07:21:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00852-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhang:2024:SDT, author = "Chen Jason Zhang and Yunrui Liu and Pengcheng Zeng and Ting Wu and Lei Chen and Pan Hui and Fei Hao", title = "Similarity-driven and task-driven models for diversity of opinion in crowdsourcing markets", journal = j-VLDB-J, volume = "33", number = "5", pages = "1377--1398", month = sep, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00853-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Aug 18 07:21:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00853-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2024:EAR, author = "Kai Wang and Minghao Cai and Xiaoshuang Chen and Xuemin Lin and Wenjie Zhang and Lu Qin and Ying Zhang", title = "Efficient algorithms for reachability and path queries on temporal bipartite graphs", journal = j-VLDB-J, volume = "33", number = "5", pages = "1399--1426", month = sep, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00854-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Aug 18 07:21:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00854-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Xu:2024:EEA, author = "Yichen Xu and Chenhao Ma and Yixiang Fang and Zhifeng Bao", title = "Efficient and effective algorithms for densest subgraph discovery and maintenance", journal = j-VLDB-J, volume = "33", number = "5", pages = "1427--1452", month = sep, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00855-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Aug 18 07:21:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00855-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2024:PBC, author = "Zhibin Wang and Longbin Lai and Yixue Liu and Bing Shui and Chen Tian and Sheng Zhong", title = "Parallelization of butterfly counting on hierarchical memory", journal = j-VLDB-J, volume = "33", number = "5", pages = "1453--1484", month = sep, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00856-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Aug 18 07:21:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00856-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Song:2024:SHT, author = "Haoze Song and Wenchao Zhou and Heming Cui and Xiang Peng and Feifei Li", title = "A survey on hybrid transactional and analytical processing", journal = j-VLDB-J, volume = "33", number = "5", pages = "1485--1515", month = sep, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00858-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Aug 18 07:21:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00858-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Peng:2024:MMC, author = "Peng Peng and Shengyi Ji and M. Tamer {\"O}zsu and Lei Zou", title = "Minimum motif-cut: a workload-aware {RDF} graph partitioning strategy", journal = j-VLDB-J, volume = "33", number = "5", pages = "1517--1542", month = sep, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00860-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Aug 18 07:21:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00860-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Xia:2024:GBB, author = "Yifei Xia and Feng Zhang and Qingyu Xu and Mingde Zhang and Zhiming Yao and Lv Lu and Xiaoyong Du and Dong Deng and Bingsheng He and Siqi Ma", title = "{GPU}-based butterfly counting", journal = j-VLDB-J, volume = "33", number = "5", pages = "1543--1567", month = sep, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00861-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Aug 18 07:21:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00861-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Kitsios:2024:FGL, author = "Xenophon Kitsios and Panagiotis Liakos and Katia Papakonstantinopoulou and Yannis Kotidis", title = "Flexible grouping of linear segments for highly accurate lossy compression of time series data", journal = j-VLDB-J, volume = "33", number = "5", pages = "1569--1589", month = sep, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00862-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Aug 18 07:21:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00862-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Pan:2024:SVD, author = "James Jie Pan and Jianguo Wang and Guoliang Li", title = "Survey of vector database management systems", journal = j-VLDB-J, volume = "33", number = "5", pages = "1591--1615", month = sep, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00864-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Aug 18 07:21:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00864-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Liang:2024:FSF, author = "Zhiyu Liang and Hongzhi Wang", title = "{FedST}: secure federated shapelet transformation for time series classification", journal = j-VLDB-J, volume = "33", number = "5", pages = "1617--1641", month = sep, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00865-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Aug 18 07:21:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00865-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yang:2024:FRC, author = "Yifei Yang and Xiangyao Yu and Marco Serafini and Ashraf Aboulnaga and Michael Stonebraker", title = "{FlexpushdownDB}: rethinking computation pushdown for cloud {OLAP DBMSs}", journal = j-VLDB-J, volume = "33", number = "5", pages = "1643--1670", month = sep, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00867-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Aug 18 07:21:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00867-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Neuhof:2024:OBF, author = "Franziska Neuhof and Marco Fisichella and George Papadakis and Konstantinos Nikoletos and Nikolaus Augsten and Wolfgang Nejdl and Manolis Koubarakis", title = "Open benchmark for filtering techniques in entity resolution", journal = j-VLDB-J, volume = "33", number = "5", pages = "1671--1696", month = sep, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00868-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Aug 18 07:21:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00868-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Liu:2024:WUG, author = "Zirui Liu and Fenghao Dong and Chengwu Liu and Xiangwei Deng and Tong Yang and Yikai Zhao and Jizhou Li and Bin Cui and Gong Zhang", title = "{WavingSketch}: an unbiased and generic sketch for finding top-$k$ items in data streams", journal = j-VLDB-J, volume = "33", number = "5", pages = "1697--1722", month = sep, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00869-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Aug 18 07:21:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00869-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhang:2024:FES, author = "Xingyi Zhang and Jinchao Huang and Fangyuan Zhang and Sibo Wang", title = "{FICOM}: an effective and scalable active learning framework for {GNNs} on semi-supervised node classification", journal = j-VLDB-J, volume = "33", number = "5", pages = "1723--1742", month = sep, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00870-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Aug 18 07:21:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00870-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wu:2024:AZS, author = "Xinle Wu and Xingjian Wu and Bin Yang and Lekui Zhou and Chenjuan Guo and Xiangfei Qiu and Jilin Hu and Zhenli Sheng and Christian S. Jensen", title = "{AutoCTS++}: zero-shot joint neural architecture and hyperparameter search for correlated time series forecasting", journal = j-VLDB-J, volume = "33", number = "5", pages = "1743--1770", month = sep, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00872-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Aug 18 07:21:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00872-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Lin:2024:CRR, author = "Hong Lin and Ke Chen and Dawei Jiang and Lidan Shou and Gang Chen", title = "Correction to: {``Refiner: a reliable and efficient incentive-driven federated learning system powered by blockchain''}", journal = j-VLDB-J, volume = "33", number = "5", pages = "1771--1771", month = sep, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00866-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sun Aug 18 07:21:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib; https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "See \cite{Lin:2024:RRE}.", URL = "https://link.springer.com/article/10.1007/s00778-024-00866-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{May:2024:SIM, author = "Norman May and Spyros Blanas and Danica Porobic", title = "Special issue: modern hardware", journal = j-VLDB-J, volume = "33", number = "6", pages = "1773--1774", month = nov, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00841-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Oct 22 11:15:12 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00841-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Nicholson:2024:HME, author = "Hamish Nicholson and Periklis Chrysogelos and Anastasia Ailamaki", title = "{HPCache}: memory-efficient {OLAP} through proportional caching revisited", journal = j-VLDB-J, volume = "33", number = "6", pages = "1775--1791", month = nov, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-023-00828-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Oct 22 11:15:12 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-023-00828-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2024:PAL, author = "Tianyu Li and Badrish Chandramouli and Samuel Madden", title = "Performant almost-latch-free data structures using epoch protection in more depth", journal = j-VLDB-J, volume = "33", number = "6", pages = "1793--1812", month = nov, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00859-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Oct 22 11:15:12 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00859-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2024:OLB, author = "Ruihong Wang and Chuqing Gao and Jianguo Wang and Prishita Kadam and M. Tamer{\"O}zsu and Walid G. Aref", title = "Optimizing {LSM}-based indexes for disaggregated memory", journal = j-VLDB-J, volume = "33", number = "6", pages = "1813--1836", month = nov, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00863-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Oct 22 11:15:12 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00863-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Fan:2024:EST, author = "Zhuochen Fan and Bowen Ye and Ziwei Wang and Zheng Zhong and Jiarui Guo and Yuhan Wu and Haoyu Li and Tong Yang and Yaofeng Tu and Zirui Liu and Bin Cui", title = "Enabling space-time efficient range queries with {REncoder}", journal = j-VLDB-J, volume = "33", number = "6", pages = "1837--1859", month = nov, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00873-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Oct 22 11:15:12 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00873-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{He:2024:DCV, author = "Yizhang He and Kai Wang and Wenjie Zhang and Xuemin Lin and Ying Zhang", title = "Discovering critical vertices for reinforcement of large-scale bipartite networks", journal = j-VLDB-J, volume = "33", number = "6", pages = "1861--1886", month = nov, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00871-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Oct 22 11:15:12 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00871-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Wang:2024:DDA, author = "Zeyu Wang and Qitong Wang and Peng Wang and Themis Palpanas and Wei Wang", title = "{DumpyOS}: a data-adaptive multi-ary index for scalable data series similarity search", journal = j-VLDB-J, volume = "33", number = "6", pages = "1887--1911", month = nov, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00874-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Oct 22 11:15:12 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00874-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2024:VFA, author = "Yiran Li and Gongyao Guo and Jieming Shi and Renchi Yang and Shiqi Shen and Qing Li and Jun Luo", title = "A versatile framework for attributed network clustering via {$K$}-nearest neighbor augmentation", journal = j-VLDB-J, volume = "33", number = "6", pages = "1913--1943", month = nov, year = "2024", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00875-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Oct 22 11:15:12 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00875-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } %%% ==================================================================== %%% From v34n1 (January 2025), page ranges are replaced by sequential %%% article numbers, so entries are now sorted with %%% ``bibsort --byarticleno''. @Article{Pan:2025:AMB, author = "Dong Pan and Xu Zhou and Wensheng Luo and Zhibang Yang and Qing Liu and Yunjun Gao and Kenli Li", title = "Accelerating maximum biplex search over large bipartite graphs", journal = j-VLDB-J, volume = "34", number = "1", pages = "??--??", month = jan, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00882-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jan 4 08:20:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00882-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "1", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Arroyuelo:2025:ERP, author = "Diego Arroyuelo and Adri{\'a}n G{\'o}mez-Brand{\'o}n and Gonzalo Navarro", title = "Evaluating regular path queries on compressed adjacency matrices", journal = j-VLDB-J, volume = "34", number = "1", pages = "??--??", month = jan, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00885-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jan 4 08:20:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00885-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "2", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Hu:2025:PRF, author = "Zheng Hu and Cong Xu and Weiguo Zheng", title = "A powerful reducing framework for accelerating set intersections over graphs", journal = j-VLDB-J, volume = "34", number = "1", pages = "??--??", month = jan, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00881-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jan 4 08:20:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00881-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "3", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Liu:2025:EOR, author = "Hao Liu and Raymond Chi-Wing Wong", title = "On efficient {3D} object retrieval", journal = j-VLDB-J, volume = "34", number = "1", pages = "??--??", month = jan, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00884-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jan 4 08:20:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00884-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "4", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zeakis:2025:DAP, author = "Alexandros Zeakis and George Papadakis and Dimitrios Skoutas and Manolis Koubarakis", title = "An in-depth analysis of pre-trained embeddings for entity resolution", journal = j-VLDB-J, volume = "34", number = "1", pages = "??--??", month = jan, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00879-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jan 4 08:20:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00879-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "5", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Hu:2025:GPM, author = "Lin Hu and Yinnian Lin and Lei Zou and M. Tamer {\"O}zsu", title = "A graph pattern mining framework for large graphs on {GPU}", journal = j-VLDB-J, volume = "34", number = "1", pages = "??--??", month = jan, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00883-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jan 4 08:20:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00883-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "6", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Shin:2025:UIL, author = "Jaewoo Shin and Libin Zhou and Jianguo Wang and Walid G. Aref", title = "An update-intensive {LSM}-based {R}-tree index", journal = j-VLDB-J, volume = "34", number = "1", pages = "??--??", month = jan, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00876-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jan 4 08:20:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00876-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "7", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Georgiadis:2025:RIO, author = "Thanasis Georgiadis and Eleni Tzirita Zacharatou and Nikos Mamoulis", title = "Raster interval object approximations for spatial intersection joins", journal = j-VLDB-J, volume = "34", number = "1", pages = "??--??", month = jan, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00887-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jan 4 08:20:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00887-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "8", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Mondal:2025:SME, author = "Manuel Mondal and Mourad Khayati and H{\^o}ng-{\^A}n Sandlin and Philippe Cudr{\'e}-Mauroux", title = "A survey of multimodal event detection based on data fusion", journal = j-VLDB-J, volume = "34", number = "1", pages = "??--??", month = jan, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00878-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jan 4 08:20:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00878-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "9", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Messaoud:2025:TBK, author = "Aghiles Ait Messaoud and Sonia {Ben Mokhtar} and Anthony Simonet-Boulogne", title = "Tee-based key-value stores: a survey", journal = j-VLDB-J, volume = "34", number = "1", pages = "??--??", month = jan, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00877-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jan 4 08:20:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00877-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "10", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ahmed:2025:RTA, author = "Waqas Ahmed and Leticia G{\'o}mez and Alejandro Vaisman and Esteban Zim{\'a}nyi", title = "Reconciling tuple and attribute timestamping for temporal data warehouses", journal = j-VLDB-J, volume = "34", number = "1", pages = "??--??", month = jan, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00889-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jan 4 08:20:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00889-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "11", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Guo:2025:DQO, author = "Yunyan Guo and Guoliang Li and Ruilin Hu and Yong Wang", title = "In-database query optimization on {SQL} with {ML} predicates", journal = j-VLDB-J, volume = "34", number = "1", pages = "??--??", month = jan, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00888-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jan 4 08:20:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00888-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "12", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Mann:2025:STS, author = "Willi Mann and Nikolaus Augsten and Christian S. Jensen and Mateusz Pawlik", title = "{SWOOP}: top-$k$ similarity joins over set streams", journal = j-VLDB-J, volume = "34", number = "1", pages = "??--??", month = jan, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00880-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jan 4 08:20:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00880-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "13", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Song:2025:ETS, author = "Yitong Song and Bin Yao and Zhida Chen and Xin Yang and Jiong Xie and Feifei Li and Mengshi Chen", title = "Efficient top-$k$ spatial-range-constrained approximate nearest neighbor search on geo-tagged high-dimensional vectors", journal = j-VLDB-J, volume = "34", number = "1", pages = "??--??", month = jan, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00894-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Jan 4 08:20:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00894-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "14", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Kim:2025:ESC, author = "Hyunju Kim and Heechan Moon and Fanchen Bu and Jihoon Ko and Kijung Shin", title = "Estimating simplet counts via sampling", journal = j-VLDB-J, volume = "34", number = "2", pages = "??--??", month = mar, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00890-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 1 11:02:25 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00890-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "15", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Mulder:2025:ONG, author = "Thomas Mulder and George Fletcher and Nikolay Yakovets", title = "Optimizing navigational graph queries", journal = j-VLDB-J, volume = "34", number = "2", pages = "??--??", month = mar, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00892-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 1 11:02:25 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00892-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "16", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Liu:2025:HGM, author = "Qiyu Liu and Maocheng Li and Yuxiang Zeng and Yanyan Shen and Lei Chen", title = "How good are multi-dimensional learned indexes? {An} experimental survey", journal = j-VLDB-J, volume = "34", number = "2", pages = "??--??", month = mar, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00893-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 1 11:02:25 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00893-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "17", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Jiang:2025:EHL, author = "Changkun Jiang and Heze Lao and Chaorui Zhang and Ji Cheng and Chen Jason Zhang and Jianqiang Li", title = "{HeteroStamp}: leveraging heterogeneous social interactions for mobility prediction-enhanced cost-aware spatiotemporal crowdsensing", journal = j-VLDB-J, volume = "34", number = "2", pages = "??--??", month = mar, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00891-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 1 11:02:25 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00891-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "18", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Tong:2025:HFE, author = "Yongxin Tong and Yuxiang Zeng and Yang Song and Xuchen Pan and Zeheng Fan and Chunbo Xue and Zimu Zhou and Xiaofei Zhang and Lei Chen and Yi Xu and Ke Xu and Weifeng Lv", title = "{Hu-Fu}: efficient and secure spatial queries over data federation", journal = j-VLDB-J, volume = "34", number = "2", pages = "??--??", month = mar, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00896-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 1 11:02:25 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00896-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "19", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Tao:2025:DPE, author = "Yuchao Tao and Amir Gilad and Ashwin Machanavajjhala and Sudeepa Roy", title = "Differentially private explanations for aggregate query answers", journal = j-VLDB-J, volume = "34", number = "2", pages = "??--??", month = mar, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00895-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 1 11:02:25 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00895-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "20", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Trummer:2025:GHC, author = "Immanuel Trummer", title = "Generating highly customizable {Python} code for data processing with large language models", journal = j-VLDB-J, volume = "34", number = "2", pages = "??--??", month = mar, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00900-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 1 11:02:25 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00900-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "21", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Peng:2025:AAS, author = "Jingshu Peng and Qiyu Liu and Zhao Chen and Yingxia Shao and Yanyan Shen and Lei Chen and Jiannong Cao", title = "From {{\sc Sancus}} to {{\sc Sancus$^q$}}: staleness and quantization-aware full-graph decentralized training in graph neural networks", journal = j-VLDB-J, volume = "34", number = "2", pages = "??--??", month = mar, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00897-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Sat Feb 1 11:02:25 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00897-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "22", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Skavantzos:2025:TBC, author = "Philipp Skavantzos and Sebastian Link", title = "Third and {Boyce--Codd} normal form for property graphs", journal = j-VLDB-J, volume = "34", number = "2", pages = "??--??", month = mar, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00902-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 11 08:47:51 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00902-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "23", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Helt:2025:CCC, author = "Jeffrey Helt and Abhinav Sharma and Daniel J. Abadi and Wyatt Lloyd and Jose M. Faleiro", title = "{C5}: cloned concurrency control that always keeps up", journal = j-VLDB-J, volume = "34", number = "2", pages = "??--??", month = mar, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00901-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 11 08:47:51 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00901-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "24", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Burckhardt:2025:NEE, author = "Sebastian Burckhardt and Badrish Chandramouli and Chris Gillum and David Justo and Konstantinos Kallas and Connor McMahon and Christopher S. Meiklejohn and Xiangfeng Zhu", title = "{Netherite}: efficient execution of serverless workflows", journal = j-VLDB-J, volume = "34", number = "2", pages = "??--??", month = mar, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00898-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 11 08:47:51 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00898-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "25", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yu:2025:QHC, author = "Yuanhang Yu and Dong Wen and Michael Yu and Lu Qin and Ying Zhang and Wenjie Zhang and Xuemin Lin", title = "Querying historical {$K$}-cores in large temporal graphs", journal = j-VLDB-J, volume = "34", number = "2", pages = "??--??", month = mar, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00903-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 11 08:47:51 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00903-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "26", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Miao:2025:ESH, author = "Xupeng Miao and Hailin Zhang and Yining Shi and Xiaonan Nie and Zhi Yang and Yangyu Tao and Jie Jiang and Bin Cui", title = "Efficient and scalable huge embedding model training via distributed cache management", journal = j-VLDB-J, volume = "34", number = "3", pages = "??--??", month = may, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00908-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 25 09:20:40 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00908-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "27", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Peng:2025:CAP, author = "Huanhuan Peng and Xiaoye Miao and Jinshan Zhang and Yunjun Gao and Shuiguang Deng and Jianwei Yin", title = "Cost-aware prediction service pricing with incomplete information", journal = j-VLDB-J, volume = "34", number = "3", pages = "??--??", month = may, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00909-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 25 09:20:40 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00909-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "28", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhang:2025:MCO, author = "Zhuoxing Zhang and Sebastian Link", title = "Mixed covers: optimizing updates and queries using minimal keys and functional dependencies", journal = j-VLDB-J, volume = "34", number = "3", pages = "??--??", month = may, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00910-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 25 09:20:40 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00910-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "29", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Liu:2025:DFA, author = "Chunwei Liu and Anna Pavlenko and Matteo Interlandi and Brandon Haynes", title = "Data formats in analytical {DBMSs}: performance trade-offs and future directions", journal = j-VLDB-J, volume = "34", number = "3", pages = "??--??", month = may, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00911-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Tue Mar 25 09:20:40 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00911-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "30", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2025:JOR, author = "Rui Li and Zongyan He and Jeffrey Xu Yu", title = "Join optimization revisited: a novel {DP} algorithm for join\&sort order selection", journal = j-VLDB-J, volume = "34", number = "3", pages = "??--??", month = may, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00906-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 11 08:47:52 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00906-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "31", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Boniol:2025:VEE, author = "Paul Boniol and Ashwin K. Krishna and Marine Bruel and Qinghua Liu and Mingyi Huang and Themis Palpanas and Ruey S. Tsay and Aaron Elmore and Michael J. Franklin and John Paparrizos", title = "{VUS}: effective and efficient accuracy measures for time-series anomaly detection", journal = j-VLDB-J, volume = "34", number = "3", pages = "??--??", month = may, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00907-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 11 08:47:52 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00907-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "32", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Yin:2025:LLI, author = "Ziqi Yin and Shanshan Feng and Shang Liu and Gao Cong and Yew Soon Ong and Bin Cui", title = "{LIST}: learning to index spatio-textual data for embedding based spatial keyword queries", journal = j-VLDB-J, volume = "34", number = "3", pages = "??--??", month = may, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00886-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Fri Apr 11 08:47:52 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00886-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "33", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Luo:2025:EIS, author = "Qi Luo and Wenjie Zhang and Liping Wang", title = "Efficient indexing and searching of constrained core in hypergraphs", journal = j-VLDB-J, volume = "34", number = "3", pages = "??--??", month = may, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00915-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:28:56 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00915-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "34", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Li:2025:GEG, author = "Pengfei Li and Yong Zhang and Hua Lu", title = "{GRELA}: Exploiting graph representation learning in effective approximate query processing", journal = j-VLDB-J, volume = "34", number = "3", pages = "??--??", month = may, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00914-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:28:56 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00914-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "35", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Ji:2025:TID, author = "Daomin Ji and Hui Luo and J. Shane Culpepper", title = "Table integration in data lakes unleashed: pairwise integrability judgment, integrable set discovery, and multi-tuple conflict resolution", journal = j-VLDB-J, volume = "34", number = "3", pages = "??--??", month = may, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00917-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:28:56 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00917-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "36", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Li:2025:DTY, author = "Qian Li and Peter Kraft and Michael Stonebraker", title = "{DBOS}: three years later", journal = j-VLDB-J, volume = "34", number = "3", pages = "??--??", month = may, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-024-00899-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:28:56 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-024-00899-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "37", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Li:2025:PPL, author = "Shuaimin Li and Xuanang Chen and Lei Chen", title = "{prompt4vis}: prompting large language models with example mining for tabular data visualization", journal = j-VLDB-J, volume = "34", number = "4", pages = "??--??", month = jul, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00912-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:30:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00912-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "38", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Budiu:2025:DAI, author = "Mihai Budiu and Leonid Ryzhyk and Val Tannen", title = "{DBSP}: automatic incremental view maintenance for rich query languages", journal = j-VLDB-J, volume = "34", number = "4", pages = "??--??", month = jul, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00922-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:30:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00922-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "39", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Tang:2025:TPC, author = "Dixin Tang and Alan Fekete and Aditya G. Parameswaran", title = "Transactional panorama: a conceptual framework for user perception in analytical visual interfaces (extended version)", journal = j-VLDB-J, volume = "34", number = "4", pages = "??--??", month = jul, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00923-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:30:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00923-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "40", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Nikookar:2025:MRR, author = "Sepideh Nikookar and Sohrab Namazi Nia and Behrooz Omidvar-Tehrani", title = "Model reusability in Reinforcement Learning", journal = j-VLDB-J, volume = "34", number = "4", pages = "??--??", month = jul, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00920-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:30:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00920-0", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "41", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Haubenschild:2025:OCA, author = "Michael Haubenschild and Viktor Leis", title = "{OLTP} in the cloud: architectures, tradeoffs, and cost", journal = j-VLDB-J, volume = "34", number = "4", pages = "??--??", month = jul, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00913-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:30:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00913-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "42", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Zhang:2025:HHK, author = "Jun Zhang and Jue Wang and Xuejian Gong", title = "{HMI}: hierarchical knowledge management for efficient multi-tenant inference in pretrained language models", journal = j-VLDB-J, volume = "34", number = "4", pages = "??--??", month = jul, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00919-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:30:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00919-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "43", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Zhang:2025:ECF, author = "Zhihao Zhang and Jianpeng Qi and Yanwei Yu", title = "Efficiently Counting Four-Node Motifs in Large-Scale Temporal Graphs", journal = j-VLDB-J, volume = "34", number = "4", pages = "??--??", month = jul, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00926-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:30:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00926-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "44", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Zhou:2025:TIO, author = "Xinjing Zhou and Xiangpeng Hao and Michael Stonebraker", title = "Tiered-Indexing: Optimizing Access Methods for Skew", journal = j-VLDB-J, volume = "34", number = "4", pages = "??--??", month = jul, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00928-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:30:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00928-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "45", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Gao:2025:EAU, author = "Xiangyu Gao and Xingxing Xiao and Jianzhong Li", title = "Efficient Algorithms for Uncertain Restricted Skyline Query Processing", journal = j-VLDB-J, volume = "34", number = "4", pages = "??--??", month = jul, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00925-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:30:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00925-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "46", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Li:2025:ATS, author = "Peng Li and Yeye He and Surajit Chaudhuri", title = "Auto-tables: synthesizing multi-step transformations to relationalize tables without using examples", journal = j-VLDB-J, volume = "34", number = "4", pages = "??--??", month = jul, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00921-z", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:30:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00921-z", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "47", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Meilicke:2025:CAB, author = "Christian Meilicke and Melisachew Wudage Chekol and Heiner Stuckenschmidt", title = "Correction: {Anytime} bottom-up rule learning for large-scale knowledge graph completion", journal = j-VLDB-J, volume = "34", number = "4", pages = "??--??", month = jul, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00918-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:30:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", note = "See \cite{Meilicke:2024:ABR}.", URL = "https://link.springer.com/article/10.1007/s00778-025-00918-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "48", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", online-date = "29 May 2025", } @Article{Bonifati:2025:TQT, author = "Angela Bonifati and Stefania Dumbrava and Dominik Tomaszuk", title = "Threshold queries in theory and in the wild", journal = j-VLDB-J, volume = "34", number = "4", pages = "??--??", month = jul, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00916-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:30:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00916-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "49", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Bonte:2025:LSR, author = "Pieter Bonte and Christophe Call{\'e} and Riccardo Tommasini", title = "Languages and systems for {RDF} stream processing, a survey", journal = j-VLDB-J, volume = "34", number = "4", pages = "??--??", month = jul, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00927-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:30:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00927-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "50", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Zhao:2025:ETR, author = "Hongyao Zhao and Wei Lu and Xiaoyong Du", title = "An Efficient Two-Round Distributed Transaction Processing Approach over Heterogeneous Networks", journal = j-VLDB-J, volume = "34", number = "4", pages = "??--??", month = jul, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00929-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:30:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00929-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "51", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Freire:2025:ESI, author = "Juliana Freire and Fatma {\"O}zcan and Xuemin Lin", title = "Editorial for Special Issue: {VLDB 2022}", journal = j-VLDB-J, volume = "34", number = "4", pages = "??--??", month = jul, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00930-y", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:30:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00930-y", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "52", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Hou:2025:ESG, author = "Jiamin Hou and Zhanhao Zhao and Xiaoyong Du", title = "An efficient and scalable graph database with built-in temporal support", journal = j-VLDB-J, volume = "34", number = "4", pages = "??--??", month = jul, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00932-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:30:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00932-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "53", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Cambria:2025:MGR, author = "Francesco Cambria and Francesco Invernici and Stefano Ceri", title = "{Mine Graph Rule}: a New {GQL} Operator for Mining Association Rules in Property Graph Databases", journal = j-VLDB-J, volume = "34", number = "4", pages = "??--??", month = jul, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00934-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:30:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00934-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "54", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Graur:2025:SQN, author = "Dan Graur and Ingo M{\"u}ller and Gustavo Alonso", title = "The Status-Quo in nested data processing for high-energy physics", journal = j-VLDB-J, volume = "34", number = "4", pages = "??--??", month = jul, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00924-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:30:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00924-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "55", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Parciak:2025:MAF, author = "Marcel Parciak and Sebastiaan Weytjens and Stijn Vansummeren", title = "Measuring approximate functional dependencies: a comparative study", journal = j-VLDB-J, volume = "34", number = "4", pages = "??--??", month = jul, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00931-x", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:30:45 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00931-x", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "56", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Saha:2025:BIV, author = "Arkaprava Saha and Xiangyu Ke and Laks V. S. Lakshmanan", title = "Beyond influence: voting theory for opinion maximization", journal = j-VLDB-J, volume = "34", number = "5", pages = "??--??", month = sep, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00933-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:18:01 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00933-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "57", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Ayad:2025:TIL, author = "Lorraine A. K. Ayad and Grigorios Loukides and Solon P. Pissis", title = "Text indexing for long patterns using locally consistent anchors", journal = j-VLDB-J, volume = "34", number = "5", pages = "??--??", month = sep, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00935-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:18:01 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00935-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "58", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Tian:2025:DTD, author = "Anxin Tian and Alexander Zhou and Chen Zhang", title = "Distributed Truss Decomposition over Large Directed Graphs", journal = j-VLDB-J, volume = "34", number = "5", pages = "??--??", month = sep, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00938-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:18:01 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00938-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "59", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Sun:2025:EMS, author = "Renjie Sun and Chen Chen and Xuemin Lin", title = "Efficient maximum signed biclique and biplex identification in signed bipartite graphs", journal = j-VLDB-J, volume = "34", number = "5", pages = "??--??", month = sep, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00939-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:18:01 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00939-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "60", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Li:2025:ROC, author = "Jiajia Li and Qiulin An and Xiaofang Zhou", title = "Route optimization with collective spatial keywords: a skyline-based approach", journal = j-VLDB-J, volume = "34", number = "5", pages = "??--??", month = sep, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00940-w", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:18:01 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00940-w", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "61", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhang:2025:SAQ, author = "Yunjia Zhang and Yannis Chronis and Theodoros Rekatsinas", title = "Simple Adaptive Query Processing vs. Learned Query Optimizers: Observations and Analysis", journal = j-VLDB-J, volume = "34", number = "5", pages = "??--??", month = sep, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00936-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Sep 25 15:18:01 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00936-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "62", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "https://dl.acm.org/loi/vldb; https://link.springer.com/journal/778", } @Article{Zhang:2025:DDL, author = "Qi Zhang and Rong-Hua Li and Guoren Wang", title = "Density decomposition on large static and dynamic graphs: algorithms and applications", journal = j-VLDB-J, volume = "34", number = "6", pages = "??--??", month = nov, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00942-8", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Oct 2 11:13:44 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00942-8", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "63", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Peng:2025:GSS, author = "Jinfeng Peng and Hanghai Cui and Ge Yu", title = "{GARF$^+$}: self-supervised and interpretable data cleaning with sequence generative adversarial networks", journal = j-VLDB-J, volume = "34", number = "6", pages = "??--??", month = nov, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00941-9", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Oct 2 11:13:44 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00941-9", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "64", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Oulefki:2025:BMB, author = "Samira Oulefki and Lamia Berkani and Aicha Mokhtari", title = "{BioGITOM}: Matching Biomedical Ontologies with Graph Isomorphism Transformer", journal = j-VLDB-J, volume = "34", number = "6", pages = "??--??", month = nov, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00943-7", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Oct 2 11:13:44 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00943-7", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "65", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Kang:2025:PTS, author = "Hongbo Kang and Yiwei Zhao and Phillip B. Gibbons", title = "{PIM-tree}: a Skew-resistant Index for Processing-in-Memory", journal = j-VLDB-J, volume = "34", number = "6", pages = "??--??", month = nov, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00937-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Oct 2 11:13:44 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00937-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "66", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Arroyuelo:2025:CST, author = "Diego Arroyuelo and Daniela Campos and Domagoj Vrgoc", title = "{CompactLTJ}: Space \& Time Efficient Leapfrog Triejoin on Graph Databases", journal = j-VLDB-J, volume = "34", number = "6", pages = "??--??", month = nov, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00945-5", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Oct 2 11:13:44 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00945-5", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "67", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Zhong:2025:FLD, author = "Kai Zhong and Luming Sun and Hong Chen", title = "{FOSS}: a learned doctor for query optimization", journal = j-VLDB-J, volume = "34", number = "6", pages = "??--??", month = nov, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00947-3", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Thu Oct 2 11:13:44 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00947-3", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "68", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Dan:2025:DEH, author = "Tangpeng Dan and Xiao Pan and Xiaofeng Meng", title = "{DHL}: an efficient hierarchical index for shortest distance querying in time-dependent road networks", journal = j-VLDB-J, volume = "34", number = "6", pages = "??--??", month = nov, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00946-4", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Dec 24 08:14:39 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00946-4", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "69", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Yang:2025:PUB, author = "Yi Yang and Yurong Cheng and Yongjiao Sun", title = "Privacy-Utility Balanced Cooperative Online Matching in Spatial Crowdsourcing", journal = j-VLDB-J, volume = "34", number = "6", pages = "??--??", month = nov, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00944-6", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Dec 24 08:14:39 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00944-6", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "70", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Lu:2025:CQP, author = "Pengkai Lu and Zhongle Xie and Lidan Shou", title = "Cohort query processing without misleading aging effects", journal = j-VLDB-J, volume = "34", number = "6", pages = "??--??", month = nov, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00948-2", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Dec 24 08:14:39 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00948-2", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "71", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", } @Article{Sylligardos:2025:MDD, author = "Emmanouil Sylligardos and John Paparrizos and Paul Boniol", title = "{MSAD}: A deep dive into model selection for time series anomaly detection", journal = j-VLDB-J, volume = "34", number = "6", pages = "??--??", month = nov, year = "2025", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-025-00949-1", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Wed Dec 24 08:14:39 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/vldbj.bib", URL = "https://link.springer.com/article/10.1007/s00778-025-00949-1", acknowledgement = ack-nhfb, ajournal = "VLDB J.", articleno = "72", fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", }