%%% -*-BibTeX-*- %%% ==================================================================== %%% BibTeX-file{ %%% author = "Nelson H. F. Beebe", %%% version = "2.101", %%% date = "17 March 2026", %%% time = "14:54:35 MDT", %%% filename = "tods.bib", %%% address = "University of Utah %%% Department of Mathematics, 110 LCB %%% 155 S 1400 E RM 233 %%% Salt Lake City, UT 84112-0090 %%% USA", %%% telephone = "+1 801 581 5254", %%% URL = "https://www.math.utah.edu/~beebe", %%% checksum = "58207 53655 286590 2825007", %%% email = "beebe at math.utah.edu, beebe at acm.org, %%% beebe at computer.org (Internet)", %%% codetable = "ISO/ASCII", %%% keywords = "bibliography; BibTeX; database systems; %%% TODS", %%% license = "public domain", %%% supported = "yes", %%% docstring = "This is a COMPLETE BibTeX bibliography for %%% ACM Transactions on Database Systems (TODS) %%% (CODEN ATDSD3, ISSN 0362-5915 (print), %%% 1557-4644 (electronic)), which began %%% publishing in March 1976. %%% %%% The companion bibliography pods.bib covers %%% the ACM SIGACT-SIGMOD Symposia on %%% Principles of Database Systems, and the %%% companion bibliography vldb.bib covers the %%% International Conferences on Very Large %%% Data Bases. The companion bibliography %%% sigmod.bib covers the ACM Special Interest %%% Group on Management of Data SIGMOD Record %%% newsletter. %%% %%% The journal has a World Wide Web site at %%% %%% http://www.acm.org/tods/ %%% http://www.acm.org/pubs/contents/journals/tods/ %%% http://portal.acm.org/browse_dl.cfm?idx=J777 %%% %%% At version 2.101, the year coverage looked %%% like this: %%% %%% 1975 ( 1) 1993 ( 20) 2011 ( 27) %%% 1976 ( 20) 1994 ( 17) 2012 ( 32) %%% 1977 ( 23) 1995 ( 13) 2013 ( 28) %%% 1978 ( 21) 1996 ( 14) 2014 ( 35) %%% 1979 ( 27) 1997 ( 14) 2015 ( 21) %%% 1980 ( 25) 1998 ( 14) 2016 ( 33) %%% 1981 ( 30) 1999 ( 13) 2017 ( 26) %%% 1982 ( 31) 2000 ( 12) 2018 ( 18) %%% 1983 ( 30) 2001 ( 12) 2019 ( 16) %%% 1984 ( 32) 2002 ( 11) 2020 ( 20) %%% 1985 ( 26) 2003 ( 15) 2021 ( 16) %%% 1986 ( 25) 2004 ( 22) 2022 ( 16) %%% 1987 ( 25) 2005 ( 29) 2023 ( 11) %%% 1988 ( 18) 2006 ( 38) 2024 ( 16) %%% 1989 ( 23) 2007 ( 30) 2025 ( 17) %%% 1990 ( 22) 2008 ( 31) 2026 ( 12) %%% 1991 ( 24) 2009 ( 25) %%% 1992 ( 20) 2010 ( 29) %%% 19xx ( 2) %%% %%% Article: 1119 %%% Book: 3 %%% InProceedings: 1 %%% Proceedings: 5 %%% %%% Total entries: 1128 %%% %%% This bibliography was initially built from %%% searches in the OCLC Content1st database. %%% Additions were then made from all of the %%% bibliographies in the TeX User Group %%% collection, from bibliographies in the %%% author's personal files, from the IEEE %%% INSPEC CD-ROM database (1989--1995), from %%% the Compendex database, from the American %%% Mathematical Society MathSciNet database, %%% and from the computer science bibliography %%% collection on ftp.ira.uka.de in %%% /pub/bibliography to which many people of %%% have contributed. The snapshot of this %%% collection was taken on 5-May-1994, and it %%% consists of 441 BibTeX files, 2,672,675 %%% lines, 205,289 entries, and 6,375 %%% String{} abbreviations, occupying %%% 94.8MB of disk space. Missing data in many %%% entries were supplied after consulting %%% original journal issues. %%% %%% Numerous errors in the sources noted above %%% have been corrected. Spelling has been %%% verified with the UNIX spell and GNU ispell %%% programs using the exception dictionary %%% stored in the companion file with extension %%% .sok. %%% %%% BibTeX citation tags are uniformly chosen as %%% name:year:abbrev, where name is the family %%% name of the first author or editor, year is a %%% 4-digit number, and abbrev is a 3-letter %%% condensation of important title words. %%% Citation labels were automatically generated %%% by software developed for the BibNet Project. %%% %%% In this bibliography, entries are sorted in %%% publication order, with the help of %%% ``bibsort -byvolume''. The bibsort utility %%% is available from ftp.math.utah.edu in %%% /pub/tex/bib. %%% %%% The checksum field above contains a CRC-16 %%% checksum as the first value, followed by the %%% equivalent of the standard UNIX wc (word %%% count) utility output of lines, words, and %%% characters. This is produced by Robert %%% Solovay's checksum utility.", %%% } %%% ==================================================================== @Preamble{ "\hyphenation{ }" } %%% ==================================================================== %%% Acknowledgement abbreviations: @String{ack-nhfb = "Nelson H. F. Beebe, University of Utah, Department of Mathematics, 110 LCB, 155 S 1400 E RM 233, Salt Lake City, UT 84112-0090, USA, Tel: +1 801 581 5254, e-mail: \path|beebe@math.utah.edu|, \path|beebe@acm.org|, \path|beebe@computer.org| (Internet), URL: \path|https://www.math.utah.edu/~beebe/|"} %%% ==================================================================== %%% Journal abbreviations: @String{j-BIT = "BIT"} @String{j-CACM = "Communications of the ACM"} @String{j-TODS = "ACM Transactions on Database Systems"} %%% ==================================================================== %%% Publishers and their addresses: @String{pub-ACM = "ACM Press"} @String{pub-ACM:adr = "New York, NY 10036, USA"} @String{pub-IEEE = "IEEE Computer Society Press"} @String{pub-IEEE:adr = "1109 Spring Street, Suite 300, Silver Spring, MD 20910, USA"} @String{pub-MORGAN-KAUFMANN = "Morgan Kaufmann Publishers"} @String{pub-MORGAN-KAUFMANN:adr = "Los Altos, CA 94022, USA"} %%% ==================================================================== %%% Bibliography entries: @Article{Yao:1977:ABA, author = "S. B. Yao", title = "Approximating Block Accesses in Database Organization", journal = j-CACM, volume = "20", number = "4", pages = "260--261", month = apr, year = "1977", CODEN = "CACMA2", ISSN = "0001-0782 (print), 1557-7317 (electronic)", bibdate = "Tue Sep 20 23:14:33 1994", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib; ftp://ftp.ira.uka.de/pub/bibliography/Database/Wiederhold.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in \cite{Yao:1977:ABM}.", ajournal = "Commun. ACM", fjournal = "Communications of the ACM", journal-URL = "https://dl.acm.org/loi/tods", keywords = "selectivity estimation I/O cost query optimization CACM", } @Article{Hsiao:1976:ATD, author = "David K. Hsiao", title = "{ACM Transactions on Database Systems}: aim and scope", journal = j-TODS, volume = "1", number = "1", pages = "1--2", year = "1976", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1976-1-1/p1-hsiao/p1-hsiao.pdf; http://www.acm.org/pubs/citations/journals/tods/1976-1-1/p1-hsiao/", abstract = "Record-keeping and decision-making in industry and government are increasingly based on data stored in computer processable databases. Thus the need for improved computer technology for building, managing, and using these databases is clearly evident. This need is particularly acute in a complex society where the interrelationships among various aspects of the society must be identified and represented. The data which must be used to represent these relationships are growing more complex in nature and becoming greater in size. Furthermore, the increasing on-line use of computer systems and the proliferation and mass introduction of multilevel secondary storage suggests that future computer systems will be primarily oriented toward database management. The large size of future on-line databases will require the computer system to manage local as well as physical resources. The management of logical resources is concerned with the organization, access, update, storage, and sharing of the data and programs in the database. In addition, the sharing of data means that the database system must be capable of providing privacy protection and of controlling access to the users' data. The term {\em data\/} is interpreted broadly to include textual, numeric, and signal data as well as data found in structured records.\par The aim of {\em ACM Transactions on Database Systems\/} (TODS) is to serve as a focal point for an integrated dissemination of database research and development on storage and processor hardware, system software, applications, information science, information analysis, and file management. These areas are particularly relevant to the following ACM Special Interest Groups: Business Data Processing (SIGBDP), Information Retrieval (SIGIR), and Management of Data (SIGMOD). TODS will also embrace parts of the Management/Database Systems and the Information Retrieval and Language Processing sections of {\em Communications of the ACM}.\par High quality papers on all aspects of computer database systems will be published in TODS. The scope of TODS emphasizes data structures; storage organization; data collection and dissemination; search and retrieval strategies; update strategies; access control techniques; data integrity; security and protection; design and implementation of database software; database related languages including data description languages, query languages, and procedural and nonprocedural data manipulation languages; language processing; analysis and classification of data; database utilities; data translation techniques; distributed database problems and techniques; database recovery and restart; database restructuring; adaptive data structures; concurrent access techniques; database computer hardware architecture; performance and evaluation; intelligent front ends; and related subjects such as privacy and economic issues.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", subject = "Information Systems --- Database Management --- Systems (H.2.4)", } @Article{Chen:1976:ERM, author = "Peter Pin-Shan S. Chen", title = "The Entity-Relationship Model: Toward a Unified View of Data", journal = j-TODS, volume = "1", number = "1", pages = "9--36", month = mar, year = "1976", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compiler/prog.lang.theory.bib; Database/Graefe.bib; Distributed/gesturing.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib; Object/Nierstrasz.bib", note = "Reprinted in \cite{Stonebraker:1988:RDS}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1976-1-1/p9-chen/p9-chen.pdf; http://www.acm.org/pubs/citations/journals/tods/1976-1-1/p9-chen/", abstract = "A data model, called the entity-relationship model, is proposed. This model incorporates some of the important semantic information about the real world. A special diagrammatic technique is introduced as a tool for database design. An example of database design and description using the model and the diagrammatic technique is given. Some implications for data integrity, information retrieval, and data manipulation are discussed.\par The entity-relationship model can be used as a basis for unification of different views of data: the network model, the relational model, and the entity set model. Semantic ambiguities in these models are analyzed. Possible ways to derive their views of data from the entity-relationship model are presented.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Data Base Task Group; data definition and manipulation; data integrity and consistency; data models; database design; dblit; entity set model; entity-relationship; entity-relationship model; logical view of data; network model; relational model; semantics of data; TODS", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}", } @Article{Bayer:1976:EST, author = "R. Bayer and J. K. Metzger", title = "On the Encipherment of Search Trees and Random Access Files", journal = j-TODS, volume = "1", number = "1", pages = "37--52", month = mar, year = "1976", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in \cite[p.~508--510]{Kerr:1975:PIC}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1976-1-1/p37-bayer/p37-bayer.pdf; http://www.acm.org/pubs/citations/journals/tods/1976-1-1/p37-bayer/", abstract = "The securing of information in indexed, random access files by means of privacy transformations must be considered as a problem distinct from that for sequential files. Not only must processing overhead due to encrypting be considered, but also threats to encipherment arising from updating and the file structure itself must be countered. A general encipherment scheme is proposed for files maintained in a paged structure in secondary storage. This is applied to the encipherment of indexes organized as $B$-trees; a $B$-tree is a particular type of multiway search tree. Threats to the encipherment of $B$-trees, especially relating to updating, are examined, and countermeasures are proposed for each. In addition, the effect of encipherment on file access and update, on paging mechanisms, and on files related to the enciphered index are discussed. Many of the concepts presented may be readily transferred to other forms of multiway index trees and to binary search trees.", acknowledgement = ack-nhfb, annote = "Trees versus hashing as his 1974 IFIP paper?", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "", subject = "Software --- Operating Systems --- Security and Protection (D.4.6): {\bf Access controls}; Software --- Operating Systems --- Security and Protection (D.4.6): {\bf Cryptographic controls}", } @Article{Lin:1976:DRA, author = "Chyuan Shiun Lin and Diane C. P. Smith and John Miles Smith", title = "The design of a rotating associative memory for relational database applications", journal = j-TODS, volume = "1", number = "1", pages = "53--65", year = "1976", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1976-1-1/p53-lin/p53-lin.pdf; http://www.acm.org/pubs/citations/journals/tods/1976-1-1/p53-lin/", abstract = "The design and motivation for a rotating associative relational store (RARES) is described. RARES is designed to enhance the performance of an optimizing relational query interface by supporting important high level optimization techniques. In particular, it can perform tuple selection operations at the storage device and also can provide a mechanism for efficient sorting. Like other designs for rotating associative stores, RARES contains search logic which is attached to the heads of a rotating head-per-track storage device. RARES is distinct from other designs in that it utilizes a novel ``orthogonal'' storage layout. This layout allows a high output rate of selected tuples even when a sort order in the stored relation must be preserved. As in certain other designs, RARES can usually output a tuple as soon as it is found to satisfy the selection criteria. However, relative to these designs, the orthogonal layout allows an order of magnitude reduction in the capacity of storage local to the search logic.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "associative memory; content addressability; data organization; head-per-track disks; memory systems; relational database; rotating devices; search logic; sorting technique", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}", } @Article{Mahmoud:1976:OAR, author = "Samy Mahmoud and J. S. Riordon", title = "Optimal Allocation of Resources in Distributed Information Networks", journal = j-TODS, volume = "1", number = "1", pages = "66--78", month = mar, year = "1976", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1976-1-1/p66-mahmoud/p66-mahmoud.pdf; http://www.acm.org/pubs/citations/journals/tods/1976-1-1/p66-mahmoud/", abstract = "The problems of file allocation and capacity assignment in a fixed topology distributed computer network are examined. These two aspects of the design are tightly coupled by means of an average message delay constraint. The objective is to allocate copies of information files to network nodes and capacities to network links so that a minimum cost is achieved subject to network delay and file availability constraints. A model for solving the problem is formulated and the resulting optimization problem is shown to fall into a class of nonlinear integer programming problems. Deterministic techniques for solving this class of problems are computationally cumbersome, even for small size problems. A new heuristic algorithm is developed, which is based on a decomposition technique that greatly reduces the computational complexity of the problem. Numerical results for a variety of network configurations indicate that the heuristic algorithm, while not theoretically convergent, yields practicable low cost solutions with substantial savings in computer processing time and storage requirements. Moreover, it is shown that this algorithm is capable of solving realistic network problems whose solutions using deterministic techniques are computationally intractable.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data files; distributed computed; information networks; link capacities; resource sharing", subject = "Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2)", } @Article{Stemple:1976:DMF, author = "David W. Stemple", title = "A Database Management Facility for Automatic Generation of Database Managers", journal = j-TODS, volume = "1", number = "1", pages = "79--94", month = mar, year = "1976", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in \cite[p.~252]{Kerr:1975:PIC}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1976-1-1/p79-stemple/p79-stemple.pdf; http://www.acm.org/pubs/citations/journals/tods/1976-1-1/p79-stemple/", abstract = "A facility is described for the implementation of database management systems having high degrees of {\em horizontal\/} data independence, i.e. independence from chosen logical properties of a database as opposed to {\em vertical\/} independence from storage structures. The facility consists of a high level language for the specification of virtual database managers, a compiler from this language to a pseudomachine language, and an interpreter for the pseudomachine language.\par It is shown how this facility can be used to produce efficient database management systems with any degree of both horizontal and vertical data independence. Two key features of this tool are the compilation of tailored database managers from individual schemas and multiple levels of optional binding.", acknowledgement = ack-nhfb, annote = "Describes SLUSH and SLIM, a proposed compiler and interpreter to operate on network schemas with adjustable binding times.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data independence; database management systems", subject = "Information Systems --- Database Management --- Systems (H.2.4); Information Systems --- Database Management (H.2); Software --- Operating Systems --- Systems Programs and Utilities (D.4.9): {\bf make}", } @Article{Astrahan:1976:SRR, author = "M. M. Astrahan and M. W. Blasgen and D. D. Chamberlin and K. P. Eswaran and J. N. Gray and P. P. Griffiths and W. F. King and R. A. Lorie and P. R. McJones and J. W. Mehl and G. R. Putzolu and I. L. Traiger and B. W. Wade and V. Watson", title = "{System R}: a Relational Approach to Database Management", journal = j-TODS, volume = "1", number = "2", pages = "97--137", month = jun, year = "1976", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Object/Nierstrasz.bib", note = "Also published in/as: IBM, San Jose, Research Report. No. RJ-1738, Feb. 1976. Reprinted in \cite{Stonebraker:1988:RDS}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1976-1-2/p97-astrahan/p97-astrahan.pdf; http://www.acm.org/pubs/citations/journals/tods/1976-1-2/p97-astrahan/", abstract = "System R is a database management system which provides a high level relational data interface. The systems provides a high level of data independence by isolating the end user as much as possible from underlying storage structures. The system permits definition of a variety of relational views on common underlying data. Data control features are provided, including authorization, integrity assertions, triggered transactions, a logging and recovery subsystem, and facilities for maintaining data consistency in a shared-update environment.\par This paper contains a description of the overall architecture and design of the system. At the present time the system is being implemented and the design evaluated. We emphasize that System R is a vehicle for research in database architecture, and is not planned as a product.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "authorization; data structures; database; dblit; index structures; locking; nonprocedural language; recovery; relational model; TODS relation database IBM San Jose", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf System R}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management (H.2)", } @Article{Navathe:1976:RLD, author = "Shamkant B. Navathe and James P. Fry", title = "Restructuring for Large Data Bases: Three Levels of Abstraction", journal = j-TODS, volume = "1", number = "2", pages = "138--158", month = mar, year = "1976", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in \cite[p.~174]{Kerr:1975:PIC}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1976-1-2/p138-navathe/p138-navathe.pdf; http://www.acm.org/pubs/citations/journals/tods/1976-1-2/p138-navathe/", abstract = "The development of a powerful restructuring function involves two important components--the unambiguous specification of the restructuring operations and the realization of these operations in a software system. This paper is directed to the first component in the belief that a precise specification will provide a firm foundation for the development of restructuring algorithms and, subsequently, their implementation. The paper completely defines the semantics of the restructuring of tree structured databases.\par The delineation of the restructuring function is accomplished by formulating three different levels of abstraction, with each level of abstraction representing successively more detailed semantics of the function.\par At the first level of abstraction, the schema modification, three types are identified--naming, combining, and relating; these three types are further divided into eight schema operations. The second level of abstraction, the instance operations, constitutes the transformations on the data instances; they are divided into group operations such as replication, factoring, union, etc., and group relation operations such as collapsing, refinement, fusion, etc. The final level, the item value operations, includes the actual item operations, such as copy value, delete value, or create a null value.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data definition; data translation; database; database management systems; logical restructuring", subject = "Information Systems --- Database Management (H.2); Information Systems --- Database Management --- Heterogeneous Databases (H.2.5): {\bf Data translation**}", } @Article{Yao:1976:DDR, author = "S. B. Yao and K. S. Das and T. J. Teorey", title = "A Dynamic Database Reorganization Algorithm", journal = j-TODS, volume = "1", number = "2", pages = "159--174", month = jun, year = "1976", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: Purdue Un., TR-168, Nov. 1975.", URL = "http://www.acm.org/pubs/articles/journals/tods/1976-1-2/p159-yao/p159-yao.pdf; http://www.acm.org/pubs/citations/journals/tods/1976-1-2/p159-yao/", abstract = "Reorganization is necessary in some databases for overcoming the performance deterioration caused by updates. The paper presents a dynamic reorganization algorithm which makes the reorganization decision by measuring the database search costs. Previously, the reorganization intervals could only be determined for linear deterioration and known database lifetime. It is shown that the dynamic reorganization algorithm is near optimum for constant reorganization cost and is superior for increasing reorganization cost. In addition, it can be applied to cases of unknown database lifetime and nonlinear performance deterioration. The simplicity, generality, and efficiency appear to make this good heuristic for database reorganization.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database; file organization; information retrieval; reorganization", subject = "Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}; Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Retrieval models}", } @Article{Burkhard:1976:HTA, author = "Walter A. Burkhard", title = "Hashing and Trie Algorithms for Partial-Match Retrieval", journal = j-TODS, volume = "1", number = "2", pages = "175--187", month = jun, year = "1976", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Wiederhold.bib; Graphics/siggraph/76.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: UCSD, Appl. Physics and Inf. Sc, CS TR.2, Jun. 1975.", URL = "http://www.acm.org/pubs/articles/journals/tods/1976-1-2/p175-burkhard/p175-burkhard.pdf; http://www.acm.org/pubs/citations/journals/tods/1976-1-2/p175-burkhard/", abstract = "File designs suitable for retrieval from a file of $k$-letter words when queries may be only partially specified are examined. A new class of partial match file designs (called PMF designs) based upon hash coding and trie search algorithms which provide good worst-case performance is introduced. Upper bounds on the worst-case performance of these designs are given along with examples of files achieving the bound. Other instances of PMF designs are known to have better worst-case performances. The implementation of the file designs with associated retrieval algorithms is considered. The amount of storage required is essentially that required of the records themselves.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; analysis; associative retrieval; hash coding; partial match; retrieval; searching; trie search", oldlabel = "geom-96", subject = "Mathematics of Computing --- Mathematical Software (G.4): {\bf Algorithm design and analysis}; Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Retrieval models}", } @Article{Stonebraker:1976:DII, author = "Michael Stonebraker and Eugene Wong and Peter Kreps and Gerald Held", title = "The Design and Implementation of {INGRES}", journal = j-TODS, volume = "1", number = "3", pages = "189--222", month = sep, year = "1976", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Parallel/Multi.bib", note = "Reprinted in \cite{Stonebraker:1988:RDS}. Also published in/as: UCB, Elec. Res. Lab, Memo No. ERL-M577, Jan. 1976.", URL = "http://www.acm.org/pubs/articles/journals/tods/1976-1-3/p189-stonebraker/p189-stonebraker.pdf; http://www.acm.org/pubs/citations/journals/tods/1976-1-3/p189-stonebraker/", abstract = "The currently operational (March 1976) version of the INGRES database management system is described. This multiuser system gives a relational view of data, supports two high level nonprocedural data sublanguages, and runs as a collection of user processes on top of the UNIX operating system for Digital Equipment Corporation PDP 11/40, 11/45, and 11/70 computers. Emphasis is on the design decisions and tradeoffs related to (1) structuring the system into processes, (2) embedding one command language in a general purpose programming language, (3) the algorithms implemented to process interactions, (4) the access methods implemented, (5) the concurrency and recovery control currently provided, and (6) the data structures used for system catalogs and the role of the database administrator.\par Also discussed are (1) support for integrity constraints (which is only partly operational), (2) the not yet supported features concerning views and protection, and (3) future plans concerning the system.", acknowledgement = ack-nhfb, annote = "Describes implementation of INGRES, a non-distributed relational database system. This paper is useful for understanding the distributed INGRES paper.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency; data integrity; data organization; data sublanguage; database optimization; nonprocedural language; protection; QUEL EQUEL query modification process structure Halloween problem TODS; query decomposition; query language; relational database", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management --- Languages (H.2.3); Information Systems --- Database Management --- General (H.2.0): {\bf Security, integrity, and protection**}", } @Article{Wong:1976:DSQ, author = "Eugene Wong and Karel Youssefi", title = "Decomposition --- {A} Strategy for Query Processing", journal = j-TODS, volume = "1", number = "3", pages = "223--241", month = sep, year = "1976", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: UCB, Elec. Res. Lab, Memo No. ERL-574, Jan. 1976", URL = "http://www.acm.org/pubs/articles/journals/tods/1976-1-3/p223-wong/p223-wong.pdf; http://www.acm.org/pubs/citations/journals/tods/1976-1-3/p223-wong/", abstract = "Strategy for processing multivariable queries in the database management system INGRES is considered. The general procedure is to decompose the query into a sequence of one-variable queries by alternating between (a) reduction: breaking off components of the query which are joined to it by a single variable, and (b) tuple substitution: substituting for one of the variables a tuple at a time. Algorithms for reduction and for choosing the variable to be substituted are given. In most cases the latter decision depends on estimation of costs; heuristic procedures for making such estimates are outlined.", acknowledgement = ack-nhfb, annote = "INGRES query decomposition by reduction to single variable queries, and tuple substitution --- choosing a variable and for it from all tuples, generating a family of queries in one fewer variable.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "connected query; decomposition; detachment; Ingres TODS; irreducible query; joining (overlapping) variable; query processing; relational database; tuple substitution; variable selection", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}", } @Article{Griffiths:1976:AMR, author = "Patricia P. Griffiths and Bradford W. Wade", title = "An Authorization Mechanism for a Relational Database System", journal = j-TODS, volume = "1", number = "3", pages = "242--255", month = sep, year = "1976", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1976-1-3/p242-griffiths/p242-griffiths.pdf; http://www.acm.org/pubs/citations/journals/tods/1976-1-3/p242-griffiths/", abstract = "A multiuser database system must selectively permit users to share data, while retaining the ability to restrict data access. There must be a mechanism to provide protection and security, permitting information to be accessed only by properly authorized users. Further, when tables or restricted views of tables are created and destroyed dynamically, the granting, authentication, and revocation of authorization to use them must also be dynamic. Each of these issues and their solutions in the context of the relational database management system System R are discussed. \par When a database user creates a table, he is fully and solely authorized to perform upon it actions such as read, insert, update, and delete. He may explicitly grant to any other user any or all of his privileges on the table. In addition he may specify that that user is authorized to further grant these privileges to still other users. The result is a directed graph of granted privileges originating from the table creator.\par At some later time a user A may revoke some or all of the privileges which he previously granted to another user B. This action usually revokes the entire subgraph of the grants originating from A's grant to B. It may be, however, that B will still possess the revoked privileges by means of a grant from another user C, and therefore some or all of B's grants should not be revoked. This problem is discussed in detail, and an algorithm for detecting exactly which of B's grants should be revoked is presented.", acknowledgement = ack-nhfb, annote = "Defines a dynamic authorization mechanism. A database user can grant or revoke privileges (such as to read, insert, or delete) on a file that he has created. Furthermore, he can authorize others to grant these same privileges. The database management system keeps track of a directed graph, emanating from the creator of granted privileges.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "access control; authorization; data dependent authorization; database systems; privacy; protection in databases; revocation of authorization; security", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management --- Systems (H.2.4); Information Systems --- Database Management --- General (H.2.0): {\bf Security, integrity, and protection**}", } @Article{Severance:1976:DFT, author = "Dennis G. Severance and Guy M. Lohman", title = "Differential Files: Their Application to the Maintenance of Large Databases", journal = j-TODS, volume = "1", number = "3", pages = "256--267", month = sep, year = "1976", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1976-1-3/p256-severance/p256-severance.pdf; http://www.acm.org/pubs/citations/journals/tods/1976-1-3/p256-severance/", abstract = "The representation of a collection of data in terms of its differences from some preestablished point of reference is a basic storage compaction technique which finds wide applicability. This paper describes a differential database representation which is shown to be an efficient method for storing large and volatile databases. The technique confines database modifications to a relatively small area of physical storage and as a result offers two significant operational advantages. First, because the ``reference point'' for the database is inherently static, it can be simply and efficiently stored. Second, since all modifications to the database are physically localized, the process of backup and the process of recovery are relatively fast and inexpensive.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "backup and recovery; data sharing; database maintenance; differential files", subject = "Information Systems --- Database Management (H.2); Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}", } @Article{Shneiderman:1976:BSS, author = "Ben Shneiderman and Victor Goodman", title = "Batched Searching of Sequential and Tree Structured Files", journal = j-TODS, volume = "1", number = "3", pages = "268--275", month = sep, year = "1976", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "See comments in \cite{Piwowarski:1985:CBS}. Also published in/as: Indiana Un., CSD Tech. Ref. 0132.", URL = "http://www.acm.org/pubs/articles/journals/tods/1976-1-3/p268-shneiderman/p268-shneiderman.pdf; http://www.acm.org/pubs/citations/journals/tods/1976-1-3/p268-shneiderman/", abstract = "The technique of batching searches has been ignored in the context of disk based online data retrieval systems. This paper suggests that batching be reconsidered for such systems since the potential reduction in processor demand may actually reduce response time. An analysis with sample numerical results and algorithms is presented.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "", subject = "Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}; Information Systems --- Information Storage and Retrieval (H.3)", } @Article{Bernstein:1976:STN, author = "Philip A. Bernstein", title = "Synthesizing Third Normal Form Relations from Functional Dependencies", journal = j-TODS, volume = "1", number = "4", pages = "277--298", month = dec, year = "1976", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Wiederhold.bib; Distributed/gesturing.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1976-1-4/p277-bernstein/p277-bernstein.pdf; http://www.acm.org/pubs/citations/journals/tods/1976-1-4/p277-bernstein/", abstract = "It has been proposed that the description of a relational database can be formulated as a set of functional relationships among database attributes. These functional relationships can then be used to synthesize algorithmically a relational scheme. It is the purpose of this paper to present an effective procedure for performing such a synthesis. The schema that results from this procedure is proved to be in Codd's third normal form and to contain the fewest possible number of relations. Problems with earlier attempts to construct such a procedure are also discussed.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database schema; functional dependency; relational model; semantics of data; third normal form", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Normal forms}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Schema and subschema}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}", } @Article{Liu:1976:APS, author = "Jane W. S. Liu", title = "Algorithms for parsing search queries in systems with inverted file organization", journal = j-TODS, volume = "1", number = "4", pages = "299--316", year = "1976", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1976-1-4/p299-liu/p299-liu.pdf; http://www.acm.org/pubs/citations/journals/tods/1976-1-4/p299-liu/", abstract = "In an inverted file system a query is in the form of a Boolean expression of index terms. In response to a query the system accesses the inverted lists corresponding to the index terms, merges them, and selects from the merged list those records that satisfy the search logic. Considered in this paper is the problem of determining a Boolean expression which leads to the minimum total merge time among all Boolean expressions that are equivalent to the expression given in the query. This problem is the same as finding an optimal merge tree among all trees that realize the truth function determined by the Boolean expression in the query. Several algorithms are described which generate optimal merge trees when the sizes of overlaps between different lists are small compared with the length of the lists.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "inverted file systems; merge algorithms; parsing Boolean queries", subject = "Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}; Mathematics of Computing --- Mathematical Software (G.4): {\bf Algorithm design and analysis}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}", } @Article{Sherman:1976:PDM, author = "Stephen W. Sherman and Richard S. Brice", title = "Performance of a Database Manager in a Virtual Memory System", journal = j-TODS, volume = "1", number = "4", pages = "317--343", month = dec, year = "1976", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1976-1-4/p317-sherman/p317-sherman.pdf; http://www.acm.org/pubs/citations/journals/tods/1976-1-4/p317-sherman/", abstract = "Buffer space is created and managed in database systems in order to reduce accesses to the I/O devices for database information. In systems using virtual memory any increase in the buffer space may be accompanied by an increase in paging. The effects of these factors on system performance are quantified where system performance is a function of page faults and database accesses to I/O devices. This phenomenon is examined through the analysis of empirical data gathered in a multifactor experiment. The factors considered are memory size, size of buffer space, memory replacement algorithm, and buffer management algorithm. The improvement of system performance through an increase in the size of the buffer space is demonstrated. It is also shown that for certain values of the other factors an increase in the size of the buffer space can cause performance to deteriorate.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "buffer manager; Buffer operating system support TODS; database management; double paging; page faults; page replacement algorithm; performance; virtual buffer; virtual memory", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Database Manager}; Mathematics of Computing --- Mathematical Software (G.4): {\bf Algorithm design and analysis}; Computer Systems Organization --- Performance of Systems (C.4)", } @Article{Donovan:1976:DSA, author = "John J. Donovan", title = "Database System Approach to Management Decision Support", journal = j-TODS, volume = "1", number = "4", pages = "344--369", month = dec, year = "1976", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1976-1-4/p344-donovan/p344-donovan.pdf; http://www.acm.org/pubs/citations/journals/tods/1976-1-4/p344-donovan/", abstract = "Traditional intuitive methods of decision-making are no longer adequate to deal with the complex problems faced by the modern policymaker. Thus systems must be developed to provide the information and analysis necessary for the decisions which must be made. These systems are called decision support systems. Although database systems provide a key ingredient to decision support systems, the problems now facing the policymaker are different from those problems to which database systems have been applied in the past. The problems are usually not known in advance, they are constantly changing, and answers are needed quickly. Hence additional technologies, methodologies, and approaches must expand the traditional areas of database and operating systems research (as well as other software and hardware research) in order for them to become truly effective in supporting policymakers. \par This paper describes recent work in this area and indicates where future work is needed. Specifically the paper discusses: (1) why there exists a vital need for decision support systems; (2) examples from work in the field of energy which make explicit the characteristics which distinguish these decision support systems from traditional operational and managerial systems; (3) how an awareness of decision support systems has evolved, including a brief review of work done by others and a statement of the computational needs of decision support systems which are consistent with contemporary technology; (4) an approach which has been made to meet many of these computational needs through the development and implementation of a computational facility, the Generalized Management Information System (GMIS); and (5) the application of this computational facility to a complex and important energy problem facing New England in a typical study within the New England Energy Management Information System (NEEMIS) Project.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database systems; decision support systems; management applications; modeling; networking; relational; virtual machines", subject = "Information Systems --- Database Management (H.2); Information Systems --- Database Management --- Systems (H.2.4)", } @Article{McGee:1976:UCD, author = "William C. McGee", title = "On user criteria for data model evaluation", journal = j-TODS, volume = "1", number = "4", pages = "370--387", year = "1976", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1976-1-4/p370-mcgee/p370-mcgee.pdf; http://www.acm.org/pubs/citations/journals/tods/1976-1-4/p370-mcgee/", abstract = "The emergence of a database technology in recent years has focused interest on the subject of data models. A data model is the class of logical data structures which a computer system or language makes available to the user for the purpose of formulating data processing applications. The diversity of computer systems and languages has resulted in a corresponding diversity of data models, and has created a problem for the user in selecting a data model which is in some sense appropriate to a given application. An evaluation procedure is needed which will allow the user to evaluate alternative models in the context of a specific set of applications. This paper takes a first step toward such a procedure by identifying the attributes of a data model which can be used as criteria for evaluating the model. Two kinds of criteria are presented: use criteria, which measure the usability of the model; and implementation criteria, which measure the implementability of the model and the efficiency of the resulting implementation. The use of the criteria is illustrated by applying them to three specific models: an $n$-ary relational model, a hierarchic model, and a network model.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data model; data model evaluation; data model selection; hierarchic model; network model; relational model", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}", } @Article{Kam:1977:MSD, author = "John B. Kam and Jeffrey D. Ullman", title = "A Model of Statistical Databases and Their Security", journal = j-TODS, volume = "2", number = "1", pages = "1--10", month = mar, year = "1977", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1977-2-1/p1-kam/p1-kam.pdf; http://www.acm.org/pubs/citations/journals/tods/1977-2-1/p1-kam/", abstract = "Considered here, for a particular model of databases in which only information about relatively large sets of records can be obtained, is the question of whether one can from statistical information obtain information about individuals. Under the assumption that the data in the database is taken from arbitrary integers, it is shown that essentially nothing can be inferred. It is also shown that when the values are known to be imprecise in some fixed range, one can often deduce the values of individual records.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "compromisability; data security; linear independence; statistical database; vector spece", subject = "Information Systems --- Database Management --- Database Applications (H.2.8): {\bf Statistical databases}; Information Systems --- Database Management --- General (H.2.0): {\bf Security, integrity, and protection**}", } @Article{Bayer:1977:PBT, author = "Rudolf Bayer and Karl Unterauer", title = "Prefix {B}-trees", journal = j-TODS, volume = "2", number = "1", pages = "11--26", month = mar, year = "1977", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: IBM Yorktwon, Technical Report RJ1796, Jun. 1976.", URL = "http://www.acm.org/pubs/articles/journals/tods/1977-2-1/p11-bayer/p11-bayer.pdf; http://www.acm.org/pubs/citations/journals/tods/1977-2-1/p11-bayer/", abstract = "Two modifications of $B$-trees are described, simple prefix $B$-trees and prefix $B$-trees. Both store only parts of keys, namely prefixes, in the index part of a $B$ *-tree. In simple prefix $B$-trees those prefixes are selected carefully to minimize their length. In prefix $B$-trees the prefixes need not be fully stored, but are reconstructed as the tree is searched. Prefix $B$-trees are designed to combine some of the advantages of $B$-trees, digital search trees, and key compression techniques while reducing the processing overhead of compression techniques.", acknowledgement = ack-nhfb, annote = "Index Btree structures can easily be compressed.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "truncation compression TODS", subject = "Data --- Data Structures (E.1): {\bf Trees}", } @Article{Schkolnick:1977:CAH, author = "Mario Schkolnick", title = "A Clustering Algorithm for Hierarchical Structures", journal = j-TODS, volume = "2", number = "1", pages = "27--44", month = may, year = "1977", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 10 09:36:45 1996", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", annote = "Optimal file partitioning, applied to IMS.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Yao:1977:ABM, author = "S. B. Yao", title = "An Attribute Based Model for Database Access Cost Analysis", journal = j-TODS, volume = "2", number = "1", pages = "45--67", month = mar, year = "1977", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in \cite{Yao:1977:ABA}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1977-2-1/p45-yao/p45-yao.pdf; http://www.acm.org/pubs/citations/journals/tods/1977-2-1/p45-yao/", abstract = "A generalized model for physical database organizations is presented. Existing database organizations are shown to fit easily into the model as special cases. Generalized access algorithms and cost equations associated with the model are developed and analyzed. The model provides a general design framework in which the distinguishing properties of database organizations are made explicit and their performances can be compared.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "B-tree; database model; database organization; database performance; estimation approximation TODS; evaluation; index organization; index sequential; inverted file; multilist", subject = "Information Systems --- Database Management --- Logical Design (H.2.1); Information Systems --- Information Storage and Retrieval --- Content Analysis and Indexing (H.3.1); Data --- Data Structures (E.1): {\bf Trees}", } @Article{Anderson:1977:MCS, author = "Henry D. Anderson and P. Bruce Berra", title = "Minimum Cost Selection of Secondary Indexes for Formatted Files", journal = j-TODS, volume = "2", number = "1", pages = "68--90", month = mar, year = "1977", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1977-2-1/p68-anderson/p68-anderson.pdf; http://www.acm.org/pubs/citations/journals/tods/1977-2-1/p68-anderson/", abstract = "Secondary indexes are often used in database management systems for secondary key retrieval. Although their use can improve retrieval time significantly, the cost of index maintenance and storage increases the overhead of the file processing application. The optimal set of indexed secondary keys for a particular application depends on a number of application dependent factors. In this paper a cost function is developed for the evaluation of candidate indexing choices and applied to the optimization of index selection. Factors accounted for include file size, the relative rates of retrieval and maintenance and the distribution of retrieval and maintenance over the candidate keys, index structure, and system charging rates. Among the results demonstrated are the increased effectiveness of secondary indexes for large files, the effect of the relative rates of retrieval and maintenance, the greater cost of allowing for arbitrarily formulated queries, and the impact on cost of the use of different index structures.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "access methods; access path; Boolean query; cost function; data management; database; file design; file organization; inverted file; inverted index; maintenance; optimization; retrieval; secondary index; secondary key; secondary key access", subject = "Information Systems --- Information Storage and Retrieval --- Content Analysis and Indexing (H.3.1): {\bf Indexing methods}; Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}; Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Access methods}; Information Systems --- Database Management (H.2)", } @Article{Lorie:1977:PIL, author = "Raymond A. Lorie", title = "Physical Integrity in a Large Segmented Database", journal = j-TODS, volume = "2", number = "1", pages = "91--104", month = mar, year = "1977", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1977-2-1/p91-lorie/p91-lorie.pdf; http://www.acm.org/pubs/citations/journals/tods/1977-2-1/p91-lorie/", abstract = "A database system can generally be divided into three major components. One component supports the logical database as seen by the user. Another component maps the information into physical records. The third component, called the storage component, is responsible for mapping these records onto auxiliary storage (generally disks) and controlling their transfer to and from main storage.\par This paper is primarily concerned with the implementation of a storage component. It considers a simple and classical interface to the storage component: Seen at this level the database is a collection of segments. Each segment is a linear address space.\par A recovery scheme is first proposed for system failure (hardware or software error which causes the contents of main storage to be lost). It is based on maintaining a dual mapping between pages and their location on disk. One mapping represents the current state of a segment being modified; the other represents a previous backup state. At any time the backup state can be replaced by the current state without any data merging. Procedures for segment modification, save, and restore are analyzed. Another section proposes a facility for protection against damage to the auxiliary storage itself. It is shown how such protection can be obtained by copying on a tape (checkpoint) only those pages that have been modified since the last checkpoint.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "checkpoint-restart; database; recovery; storage management", subject = "Information Systems --- Database Management --- General (H.2.0): {\bf Security, integrity, and protection**}; Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2); Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Recovery and restart}", } @Article{Smith:1977:DAA, author = "John Miles Smith and Diane C. P. Smith", title = "Database abstractions: Aggregation and Generalization", journal = j-TODS, volume = "2", number = "2", pages = "105--133", month = jun, year = "1977", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; Distributed/gesturing.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Object/Nierstrasz.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1977-2-2/p105-smith/p105-smith.pdf; http://www.acm.org/pubs/citations/journals/tods/1977-2-2/p105-smith/", abstract = "Two kinds of abstraction that are fundamentally important in database design and usage are defined. Aggregation is an abstraction which turns a relationship between objects into an aggregate object. Generalization is an abstraction which turns a class of objects into a generic object. It is suggested that all objects (individual, aggregate, generic) should be given uniform treatment in models of the real world. A new data type, called generic, is developed as a primitive for defining such models. Models defined with this primitive are structured as a set of aggregation hierarchies intersecting with a set of generalization hierarchies. Abstract objects occur at the points of intersection. This high level structure provides a discipline for the organization of relational databases. In particular this discipline allows: (i) an important class of views to be integrated and maintained; (ii) stability of data and programs under certain evolutionary changes; (iii) easier understanding of complex models and more natural {\em query formulation;\/} (iv) {\em a more systematic approach to database design;\/} (v) {\em more optimization\/} to be performed at lower implementation levels. The generic type is formalized by a set of invariant properties. These properties should be satisfied by all relations in a database if abstractions are to be preserved. A triggering mechanism for automatically maintaining these invariants during update operations is proposed. A simple mapping of aggregation/generalization hierarchies onto owner-coupled set structures is given.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "aggregation; data abstraction; data model; data type; database design; dblit data abstraction; generalization; integrity constraints; knowledge representation; relational database", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Software --- Software Engineering --- Software Architectures (D.2.11): {\bf Data abstraction}", } @Article{Shu:1977:EDE, author = "N. C. Shu and B. C. Housel and R. W. Taylor and S. P. Ghosh and V. Y. Lum", title = "{EXPRESS}: a data {EXtraction, Processing, and Restructuring System}", journal = j-TODS, volume = "2", number = "2", pages = "134--174", month = jun, year = "1977", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1977-2-2/p134-shu/p134-shu.pdf; http://www.acm.org/pubs/citations/journals/tods/1977-2-2/p134-shu/", abstract = "EXPRESS is an experimental prototype data translation system which can access a wide variety of data and restructure it for new uses. The system is driven by two very high level nonprocedural languages: DEFINE for data description and CONVERT for data restructuring. Program generation and cooperating process techniques are used to achieve efficient operation.\par This paper describes the design and implementation of EXPRESS. DEFINE and CONVERT are summarized and the implementation architecture presented.\par The DEFINE description is compiled into a customized PL/1 program for accessing source data. The restructuring specified in CONVERT is compiled into a set of customized PL/1 procedures to derive multiple target files from multiple input files. Job steps and job control statements are generated automatically. During execution, the generated procedures run under control of a process supervisor, which coordinates buffer management and handles file allocation, deallocation, and all input/output requests.\par The architecture of EXPRESS allows efficiency in execution by avoiding unnecessary secondary storage references while at the same time allowing the individual procedures to be independent of each other. Its modular structure permits the system to be extended or transferred to another environment easily.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data conversion; data description languages; data manipulation languages; data restructuring; data translation; file conversion; program generation; very high level languages", subject = "Information Systems --- Database Management --- Heterogeneous Databases (H.2.5): {\bf Data translation**}; Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}; Information Systems --- Database Management --- Languages (H.2.3)", } @Article{Ozkarahan:1977:PER, author = "E. A. Ozkarahan and S. A. Schuster and K. C. Sevcik", title = "Performance Evaluation of a Relational Associative Processor", journal = j-TODS, volume = "2", number = "2", pages = "175--195", month = jun, year = "1977", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1977-2-2/p175-ozkarahan/p175-ozkarahan.pdf; http://www.acm.org/pubs/citations/journals/tods/1977-2-2/p175-ozkarahan/", abstract = "An associative processor called RAP has been designed to provide hardware support for the use and manipulation of databases. RAP is particularly suited for supporting relational databases. In this paper, the relational operations provided by the RAP hardware are described, and a representative approach to providing the same relational operations with conventional software and hardware is devised. Analytic models are constructed for RAP and the conventional system. The execution times of several of the operations are shown to be vastly improved with RAP for large relations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "associative processors; database machines; performance evaluation; RAP hardware support database machine TODS; relational databases", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Hardware --- Control Structures and Microprogramming --- Control Structure Performance Analysis and Design Aids (B.1.2)", } @Article{Brice:1977:EPD, author = "Richard S. Brice and Stephen W. Sherman", title = "An Extension on the Performance of a Database Manager in a Virtual Memory System Using Partially Locked Virtual Buffers", journal = j-TODS, volume = "2", number = "2", pages = "196--207", month = jun, year = "1977", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1977-2-2/p196-brice/p196-brice.pdf; http://www.acm.org/pubs/citations/journals/tods/1977-2-2/p196-brice/", abstract = "Buffer pools are created and managed in database systems in order to reduce the total number of accesses to the I/O devices. In systems using virtual memory, any reduction in I/O accesses may be accompanied by an increase in paging. The effects of these factors on system performance are quantified, where system performance is a function of page faults and database accesses to the I/O devices. A previous study of this phenomenon is extended through the analysis of empirical data gathered in a multifactor experiment. In this study memory is partitioned between the program and the buffer so that the impact of the controlled factors can be more effectively evaluated. It is possible to improve system performance through the use of different paging algorithms in the program partition and the buffer partition. Also, the effects on system performance as the virtual buffer size is increased beyond the real memory allocated to the buffer partition are investigated.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "buffer manager; database management; double paging; locked buffer; page faults; page replacement algorithm; performance; pinning fixing TODS; virtual buffer; virtual memory", subject = "Hardware --- Control Structures and Microprogramming --- Control Structure Performance Analysis and Design Aids (B.1.2); Information Systems --- Database Management --- Systems (H.2.4): {\bf Database Manager}", } @Article{Lohman:1977:OPB, author = "Guy M. Lohman and John A. Muckstadt", title = "Optimal Policy for Batch Operations: Backup, Checkpointing, Reorganization, and Updating", journal = j-TODS, volume = "2", number = "3", pages = "209--222", month = sep, year = "1977", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1977-2-3/p209-lohman/p209-lohman.pdf; http://www.acm.org/pubs/citations/journals/tods/1977-2-3/p209-lohman/", abstract = "Many database maintenance operations are performed periodically in batches, even in realtime systems. The purpose of this paper is to present a general model for determining the optimal frequency of these batch operations. Specifically, optimal backup, checkpointing, batch updating, and reorganization policies are derived. The approach used exploits inventory parallels by seeking the optimal number of items--rather than a time interval--to trigger a batch. The Renewal Reward Theorem is used to find the average long run costs for backup, recovery, and item storage, per unit time, which is then minimized to find the optimal backup policy. This approach permits far less restrictive assumptions about the update arrival process than did previous models, as well as inclusion of storage costs for the updates. The optimal checkpointing, batch updating, and reorganization policies are shown to be special cases of this optimal backup policy. The derivation of previous results as special cases of this model, and an example, demonstrate the generality of the methodology developed.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "backup frequency; batch operations; batch update; checkpoint interval; data base systems; database maintenance; file reorganization; inventory theory; real-time systems; renewal theory", subject = "Information Systems --- Database Management --- General (H.2.0)", } @Article{Wong:1977:IHT, author = "Kai C. Wong and Murray Edelberg", title = "Interval Hierarchies and Their Application to Predicate Files", journal = j-TODS, volume = "2", number = "3", pages = "223--232", month = sep, year = "1977", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1977-2-3/p223-wong/p223-wong.pdf; http://www.acm.org/pubs/citations/journals/tods/1977-2-3/p223-wong/", abstract = "Predicates are used extensively in modern database systems for purposes ranging from user specification of associative accesses to data, to user-invisible system control functions such as concurrency control and data distribution. Collections of predicates, or predicate files, must be maintained and accessed efficiently. A dynamic index is described, called an interval hierarchy, which supports several important retrieval operations on files of simple conjunctive predicates. Search and maintenance algorithms for interval hierarchies are given. For a file of n predicates, typical of the kind expected in practice, these algorithms require time equal to $ O(\log n) $.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency control; data base systems; database system; distributed data; index; interval; predicate file", subject = "Software --- Operating Systems --- Storage Management (D.4.2): {\bf Storage hierarchies}; Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Distributed databases}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Concurrency}; Information Systems --- Information Storage and Retrieval --- Content Analysis and Indexing (H.3.1): {\bf Indexing methods}", } @Article{Ries:1977:ELG, author = "Daniel R. Ries and Michael Stonebraker", title = "Effects of Locking Granularity in a Database Management System", journal = j-TODS, volume = "2", number = "3", pages = "233--246", month = sep, year = "1977", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1977-2-3/p233-ries/p233-ries.pdf; http://www.acm.org/pubs/citations/journals/tods/1977-2-3/p233-ries/", abstract = "Many database systems guarantee some form of integrity control upon multiple concurrent updates by some form of locking. Some ``granule'' of the database is chosen as the unit which is individually locked, and a lock management algorithm is used to ensure integrity. Using a simulation model, this paper explores the desired size of a granule. Under a wide variety of seemingly realistic conditions, surprisingly coarse granularity is called for. The paper concludes with some implications of these results concerning the viability of so-called predicate locking.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency; consistency; data base systems; database management; locking granularity; multiple updates; predicate locks", subject = "Information Systems --- Database Management --- Systems (H.2.4); Information Systems --- Database Management (H.2); Information Systems --- Database Management --- Systems (H.2.4): {\bf Concurrency}", } @Article{Schmidt:1977:SHL, author = "Joachim W. Schmidt", title = "Some High Level Language Constructs for Data of Type Relation", journal = j-TODS, volume = "2", number = "3", pages = "247--261", month = sep, year = "1977", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1977-2-3/p247-schmidt/p247-schmidt.pdf; http://www.acm.org/pubs/citations/journals/tods/1977-2-3/p247-schmidt/", abstract = "For the extension of high level languages by data types of mode relation, three language constructs are proposed and discussed: a repetition statement controlled by relations, predicates as a generalization of Boolean expressions, and a constructor for relations using predicates. The language constructs are developed step by step starting with a set of elementary operations on relations. They are designed to fit into PASCAL without introducing too many additional concepts.", acknowledgement = ack-nhfb, annote = "PASCAL/R", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming languages; data type; database; high level language; language extension; nonprocedural language; relational calculus; relational model", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management --- Languages (H.2.3)", } @Article{Fagin:1977:MVD, author = "Ronald Fagin", title = "Multi-Valued Dependencies and a New Normal Form for Relational Databases", journal = j-TODS, volume = "2", number = "3", pages = "262--278", month = sep, year = "1977", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; Distributed/gesturing.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1977-2-3/p262-fagin/p262-fagin.pdf; http://www.acm.org/pubs/citations/journals/tods/1977-2-3/p262-fagin/", abstract = "A new type of dependency, which includes the well-known functional dependencies as a special case, is defined for relational databases. By using this concept, a new (``fourth'') normal form for relation schemata is defined. This fourth normal form is strictly stronger than Codd's ``improved third normal form'' (or ``Boyce-Codd normal form''). It is shown that every relation schema can be decomposed into a family of relation schemata in fourth normal form without loss of information (that is, the original relation can be obtained from the new relations by taking joins).", acknowledgement = ack-nhfb, annote = "Multivalued dependency is defined for relational databases, a new (``fourth'') normal form is strictly stronger than Codd's.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "3NF; 4NF; Boyce-Codd normal form; data base systems; database design; decomposition; fourth normal form; functional dependency; multivalued dependency; normalization; relational database; third normal form", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Normal forms}", } @Article{March:1977:DER, author = "Salvatore T. March and Dennis G. Severance", title = "The Determination of Efficient Record Segmentations and Blocking Factors for Shared Data Files", journal = j-TODS, volume = "2", number = "3", pages = "279--296", month = sep, year = "1977", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1977-2-3/p279-march/p279-march.pdf; http://www.acm.org/pubs/citations/journals/tods/1977-2-3/p279-march/", abstract = "It is generally believed that 80 percent of all retrieval from a commercial database is directed at only 20 percent of the stored data items. By partitioning data items into primary and secondary record segments, storing them in physically separate files, and judiciously allocating available buffer space to the two files, it is possible to significantly reduce the average cost of information retrieval from a shared database. An analytic model, based upon knowledge of data item lengths, data access costs, and user retrieval patterns, is developed to assist an analyst with this assignment problem. A computationally tractable design algorithm is presented and results of its application are described.", acknowledgement = ack-nhfb, classification = "723; 901", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "bicriterion mathematical programs; branch and bound; buffer allocation; data base systems; data management; information science --- information retrieval; network flows; record design; record segmentation", subject = "Information Systems --- Database Management (H.2); Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}", } @Article{Ozkarahan:1977:AAF, author = "E. A. Ozkarahan and K. C. Sevcik", title = "Analysis of Architectural Features for Enhancing the Performance of a Database Machine", journal = j-TODS, volume = "2", number = "4", pages = "297--316", month = dec, year = "1977", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1977-2-4/p297-ozkarahan/p297-ozkarahan.pdf; http://www.acm.org/pubs/citations/journals/tods/1977-2-4/p297-ozkarahan/", abstract = "RAP (Relational Associative Processor) is a ``back-end'' database processor that is intended to take over much of the effort of database management in a computer system. In order to enhance RAP's performance its design includes mechanisms for permitting features analogous to multiprogramming and virtual memory as in general purpose computer systems. It is the purpose of this paper to present the detailed design of these mechanisms, along with some analysis that supports their value. Specifically, (1) the response time provided by RAP under several scheduling disciplines involving priority by class is analyzed, (2) the cost effectiveness of the additional hardware in RAP necessary to support multiprogramming is assessed, and (3) a detailed design of the RAP virtual memory system and its monitor is presented.", acknowledgement = ack-nhfb, annote = "RAP (Relational Associative Processor) is a ``back-end database processor''; its design includes mechanisms for multiprogramming and virtual memory.", classification = "722; 723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "associative processors; computer architecture; computer architecture, hardware support TODS; data base systems; database machines; database management", subject = "Information Systems --- Database Management (H.2)", } @Article{Rissanen:1977:ICR, author = "Jorma Rissanen", title = "Independent Components of Relations", journal = j-TODS, volume = "2", number = "4", pages = "317--325", month = dec, year = "1977", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1977-2-4/p317-rissanen/p317-rissanen.pdf; http://www.acm.org/pubs/citations/journals/tods/1977-2-4/p317-rissanen/", abstract = "In a multiattribute relation or, equivalently, a multicolumn table a certain collection of the projections can be shown to be independent in much the same way as the factors in a Cartesian product or orthogonal components of a vector. A precise notion of independence for relations is defined and studied. The main result states that the operator which reconstructs the original relation from its independent components is the natural join, and that independent components split the full family of functional dependencies into corresponding component families. These give an easy-to-check criterion for independence.", acknowledgement = ack-nhfb, annote = "In a multi-attribute relation a certain collection of projections can be shown to be independent. The operator which reconstructs the original relation is the natural join. Independent components split the full family of functional dependencies into corresponding component families.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems; database; functional dependencies; relations", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}", } @Article{Bonczek:1977:TGB, author = "Robert H. Bonczek and James I. Cash and Andrew B. Whinston", title = "A Transformational Grammar-Based Query Processor for Access Control in a Planning System", journal = j-TODS, volume = "2", number = "4", pages = "326--338", month = dec, year = "1977", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1977-2-4/p326-bonczek/p326-bonczek.pdf; http://www.acm.org/pubs/citations/journals/tods/1977-2-4/p326-bonczek/", abstract = "Providing computer facilities and data availability to larger numbers of users generates increased system vulnerability which is partially offset by software security systems. Much too often these systems are presented as ad hoc additions to the basic data management system. One very important constituent of software security systems is the access control mechanism which may be the last resource available to prohibit unauthorized data retrieval. This paper presents a specification for an access control mechanism. The mechanism is specified in a context for use with the GPLAN decision support system by a theoretical description consistent with the formal definition of GPLAN's query language. Incorporation of the mechanism into the language guarantees it will not be an ad hoc addition. Furthermore, it provides a facile introduction of data security dictates into the language processor.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "access control; data processing; data security; database; decision support system; planning system", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}; Information Systems --- Database Management --- General (H.2.0): {\bf Security, integrity, and protection**}; Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Access methods}", } @Article{Lang:1977:DBP, author = "Tom{\'a}s Lang and Christopher Wood and Eduardo B. Fern{\'a}ndez", title = "Database Buffer Paging in Virtual Storage Systems", journal = j-TODS, volume = "2", number = "4", pages = "339--351", month = dec, year = "1977", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1977-2-4/p339-lang/p339-lang.pdf; http://www.acm.org/pubs/citations/journals/tods/1977-2-4/p339-lang/", abstract = "Three models, corresponding to different sets of assumptions, are analyzed to study the behavior of a database buffer in a paging environment. The models correspond to practical situations and vary in their search strategies and replacement algorithms. The variation of I/O cost with respect to buffer size is determined for the three models. The analysis is valid for arbitrary database and buffer sizes, and the I/O cost is obtained in terms of the miss ratio, the buffer size, the number of main memory pages available for the buffer, and the relative buffer and database access costs.", acknowledgement = ack-nhfb, annote = "The variation of I/O cost with respect to buffer size is determined for three models: the IMS/360 database buffer, with LRU memory replacement, and a prefix table in main memory indicating which database pages are in the VSAM buffer.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "buffer management; computer systems performance; data base systems; database performance; page replacement algorithm; virtual memory", subject = "Information Systems --- Database Management --- General (H.2.0); Information Systems --- Database Management --- Systems (H.2.4)", } @Article{Thomas:1977:VAP, author = "D. A. Thomas and B. Pagurek and R. J. Buhr", title = "Validation Algorithms for Pointer Values in {DBTG} Databases", journal = j-TODS, volume = "2", number = "4", pages = "352--369", month = dec, year = "1977", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1977-2-4/p352-thomas/p352-thomas.pdf; http://www.acm.org/pubs/citations/journals/tods/1977-2-4/p352-thomas/", abstract = "This paper develops algorithms for verifying pointer values in DBTG (Data Base Task Group) type databases. To validate pointer implemented access paths and set structures, two algorithms are developed. The first procedure exploits the ``typed pointer'' concept employed in modern programming languages to diagnose abnormalities in directories and set instances. The second algorithm completes pointer validation by examining set instances to ensure that each DBTG set has a unique owner. Sequential processing is used by both algorithms, allowing a straightforward implementation which is efficient in both time and space. As presented, the algorithms are independent of implementation schema and physical structure.", acknowledgement = ack-nhfb, annote = "Type Checking algorithm detects and locates errors in the pointers which are used to represent chained and pointer array implemented sets. In addition to invalid set pointers, the algorithm has been extended to check index sequential and inverted access directories provided by EDMS.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems; database integrity; database utilities; type checking; validation", subject = "Information Systems --- Database Management (H.2); Information Systems --- Database Management --- General (H.2.0): {\bf Security, integrity, and protection**}", } @Article{Claybrook:1977:FDM, author = "Billy G. Claybrook", title = "A Facility for Defining and Manipulating Generalized Data Structures", journal = j-TODS, volume = "2", number = "4", pages = "370--406", month = dec, year = "1977", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1977-2-4/p370-claybrook/p370-claybrook.pdf; http://www.acm.org/pubs/citations/journals/tods/1977-2-4/p370-claybrook/", abstract = "A data structure definition facility (DSDF) is described that provides definitions for several primitive data types, homogeneous and heterogeneous arrays, cells, stacks, queues, trees, and general lists. Each nonprimitive data structure consists of two separate entities--a head and a body. The head contains the entry point(s) to the body of the structure; by treating the head like a cell, the DSDF operations are capable of creating and manipulating very general data structures. A template structure is described that permits data structures to share templates.\par The primary objectives of the DSDF are: (1) to develop a definition facility that permits the programmer to explicitly define and manipulate generalized data structures in a consistent manner, (2) to detect mistakes and prevent the programmer from creating (either inadvertently or intentionally) undesirable (or illegal) data structures, (3) to provide a syntactic construction mechanism that separates the implementation of a data structure from its use in the program in which it is defined, and (4) to facilitate the development of reliable software.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data definition languages; data processing; data structure definition facility; data structures; database management", subject = "Information Systems --- Database Management (H.2); Information Systems --- Database Management --- Languages (H.2.3)", } @Article{Minker:1978:SSS, author = "Jack Minker", title = "Search Strategy and Selection Function for an Inferential Relational System", journal = j-TODS, volume = "3", number = "1", pages = "1--31", month = mar, year = "1978", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1978-3-1/p1-minker/p1-minker.pdf; http://www.acm.org/pubs/citations/journals/tods/1978-3-1/p1-minker/", abstract = "An inferential relational system is one in which data in the system consists of both explicit facts and general axioms (or ``views''). The general axioms are used together with the explicit facts to derive the facts that are implicit (virtual relations) within the system. A top-down algorithm, as used in artificial intelligence work, is described to develop inferences within the system. The top-down approach starts with the query, a conjunction of relations, to be answered. Either a relational fact solves a given relation in a conjunct, or the relation is replaced by a conjunct of relations which must be solved to solve the given relation. The approach requires that one and only one relation in a conjunction be replaced (or expanded) by the given facts and general axioms. The decision to expand only a single relation is termed a selection function. It is shown for relational systems that such a restriction still guarantees that a solution to the problem will be found if one exists.\par The algorithm provides for heuristic direction in the search process. Experimental results are presented which illustrate the techniques. A bookkeeping mechanism is described which permits one to know when subproblems are solved. It further facilitates the outputting of reasons for the deductively found answer in a coherent fashion.", acknowledgement = ack-nhfb, annote = "Data in the system consists of both explicit facts and general axioms. The top-down approach starts with the query, a conjunction of relations, to be answered. Either a relational fact solves a given relation in a conjunct, or the relation is replaced by a conjunct of relations which must be solved to solve the given relation. Experimental results are presented which illustrate the techniques.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "answer and reason extraction; data base systems; heuristics; inference mechanism; logic; predicate calculus; relational databases; search strategy; selection function; top-down search; virtual relations", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Search process}", } @Article{Tuel:1978:ORP, author = "William G. {Tuel, Jr.}", title = "Optimum Reorganization Points for Linearly Growing Files", journal = j-TODS, volume = "3", number = "1", pages = "32--40", month = mar, year = "1978", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1978-3-1/p32-tuel/p32-tuel.pdf; http://www.acm.org/pubs/citations/journals/tods/1978-3-1/p32-tuel/", abstract = "The problem of finding optimal reorganization intervals for linearly growing files is solved. An approximate reorganization policy, independent of file lifetime, is obtained. Both the optimum and approximate policies are compared to previously published results using a numerical example.", acknowledgement = ack-nhfb, annote = "The problem of finding optimal reorganization intervals for linearly growing files is solved.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data processing --- file organization; database; file organization; optimization; physical database design TODS, data base systems; reorganization", subject = "Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}", } @Article{Yu:1978:END, author = "C. T. Yu and W. S. Luk and M. K. Siu", title = "On the Estimation of the Number of Desired Records with Respect to a Given Query", journal = j-TODS, volume = "3", number = "1", pages = "41--56", month = mar, year = "1978", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1978-3-1/p41-yu/p41-yu.pdf; http://www.acm.org/pubs/citations/journals/tods/1978-3-1/p41-yu/", abstract = "The importance of the estimation of the number of desired records for a given query is outlined. Two algorithms for the estimation in the ``closest neighbors problem'' are presented. The numbers of operations of the algorithms are $ O(m \ell^2) $ and $ O(m \ell) $, where $m$ is the number of clusters and $ \ell $ is the ``length'' of the query.", acknowledgement = ack-nhfb, annote = "Two Algorithms for the estimation in the `closest neighbors problem'", classification = "901", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "closest neighbors; database; estimate; information science, CTYu selectivity TODS; query", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}", } @Article{Su:1978:CCS, author = "Stanley Y. W. Su and Ahmed Emam", title = "{CASDAL}: {{\em CAS\/}SM}'s {{\em DA\/}}ta {{\em L\/}}anguage", journal = j-TODS, volume = "3", number = "1", pages = "57--91", month = mar, year = "1978", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/citations/journals/tods/1978-3-1/p57-su/", abstract = "CASDAL is a high level data language designed and implemented for the database machine CASSM. The language is used for the manipulation and maintenance of a database using an unnormalized (hierarchically structured) relational data model. It also has facilities to define, modify, and maintain the data model definition. The uniqueness of CASDAL lies in its power to specify complex operations in terms of several new language constructs and its concepts of tagging or marking tuples and of matching values when walking from relation to relation. The language is a result of a top-down design and development effort for a database machine in which high level language constructs are directly supported by the hardware. This paper (1) gives justifications for the use of an unnormalized relational model on which the language is based, (2) presents the CASDAL language constructs with examples, and (3) describes CASSM's architecture and hardware primitives which match closely with the high level language constructs and facilitate the translation process. This paper also attempts to show how the efficiency of the language and the translation task can be achieved and simplified in a system in which the language is the result of a top-down system design and development.", acknowledgement = ack-nhfb, annote = "CASDAL is a high level data language for the database machine CASSM. It uses an unnormalized (hierarchically structured) relational data model. This paper (1) justifies the use of this model (2) presents the Casdal language constructs with examples, and (3) describes CASSM's architecture.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "associative memory; computer programming languages; data language; database; nonprocedural language; query language; relational model; SYWSu hardware support database machine TODS, data base systems", subject = "Information Systems --- Database Management --- Languages (H.2.3); Information Systems --- Database Management --- Languages (H.2.3): {\bf Query languages}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}", } @Article{Chin:1978:SSD, author = "Francis Y. Chin", title = "Security in Statistical Databases for Queries with Small Counts", journal = j-TODS, volume = "3", number = "1", pages = "92--104", month = mar, year = "1978", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1978-3-1/p92-chin/p92-chin.pdf; http://www.acm.org/pubs/citations/journals/tods/1978-3-1/p92-chin/", abstract = "The security problem of statistical databases containing anonymous but individual records which may be evaluated by queries about sums and averages is considered. A model, more realistic than the previous ones, is proposed, in which nonexisting records for some keys can be allowed. Under the assumption that the system protects the individual's information by the well-known technique which avoids publishing summaries with small counts, several properties about the system and a necessary and sufficient condition for compromising the database have been derived. The minimum number of queries needed to compromise the database is also discussed.", acknowledgement = ack-nhfb, annote = "Under the assumption that the system protects the individual's information by the technique which avoids publishing summaries with small counts, properties about the system and a necessary and sufficient condition for compromising the database have been derived.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "compromisability; data base systems; data processing --- security of data; data security; protection; statistical databases", subject = "Information Systems --- Database Management --- Database Applications (H.2.8): {\bf Statistical databases}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}; Information Systems --- Database Management --- Database Administration (H.2.7): {\bf Security, integrity, and protection}", } @Article{Hendrix:1978:DNL, author = "Gary G. Hendrix and Earl D. Sacerdoti and Daniel Sagalowicz and Jonathan Slocum", title = "Developing a Natural Language Interface to Complex Data", journal = j-TODS, volume = "3", number = "2", pages = "105--147", month = jun, year = "1978", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Ai/Ai.misc.bib; Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1978-3-2/p105-hendrix/p105-hendrix.pdf; http://www.acm.org/pubs/citations/journals/tods/1978-3-2/p105-hendrix/", abstract = "Aspects of an intelligent interface that provides natural language access to a large body of data distributed over a computer network are described. The overall system architecture is presented, showing how a user is buffered from the actual database management systems (DBMSs) by three layers of insulating components. These layers operate in series to convert natural language queries into calls to DBMSs at remote sites. Attention is then focused on the first of the insulating components, the natural language system. A pragmatic approach to language access that has proved useful for building interfaces to databases is described and illustrated by examples. Special language features that increase system usability, such as spelling correction, processing of incomplete inputs, and run-time system personalization, are also discussed. The language system is contrasted with other work in applied natural language processing, and the system's limitations are analyzed.", acknowledgement = ack-nhfb, classification = "722; 723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems; database access; human engineering; intelligent access semantic grammar human engineering run-time personalization, computer interfaces; intelligent interface; natural language; Natural Language, Intelligent Interface, Database Access, Semantic Grammar, Human Engineering, Runtime Personalization; run-time personalization; semantic grammar", subject = "Information Systems --- Database Management --- Languages (H.2.3); Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Access methods}", } @Article{Langdon:1978:NAP, author = "Glen G. {Langdon, Jr.}", title = "A Note on Associative Processors for Data Management", journal = j-TODS, volume = "3", number = "2", pages = "148--158", month = jun, year = "1978", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1978-3-2/p148-langdon/p148-langdon.pdf; http://www.acm.org/pubs/citations/journals/tods/1978-3-2/p148-langdon/", abstract = "Associative ``logic-per-track'' processors for data management are examined from a technological and engineering point of view. Architectural and design decisions are discussed. Some alternatives to the design of comparators, garbage collection, and domain extraction for architectures like the Relational Associative Processor (RAP) are offered.", acknowledgement = ack-nhfb, annote = "Associative ``logic-per-track'' processors for data management are examined from a technological and engineering point of view (RAP).", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "associative processors; computer operating systems; data base systems, hardware support database machine TODS; database machines", subject = "Information Systems --- Database Management (H.2)", } @Article{Kluge:1978:DFM, author = "Werner E. Kluge", title = "Data File Management in Shift-Register Memories", journal = j-TODS, volume = "3", number = "2", pages = "159--177", month = jun, year = "1978", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1978-3-2/p159-kluge/p159-kluge.pdf; http://www.acm.org/pubs/citations/journals/tods/1978-3-2/p159-kluge/", abstract = "The paper proposes a shift-register memory, structured as a two-dimensional array of uniform shift-register loops which are linked by flow-steering switches, whose switch control scheme is tailored to perform with great efficiency data management operations on sequentially organized files. The memory operates in a linear input/output mode to perform record insertion, deletion, and relocation on an existing file, and in a sublinear mode for rapid internal file movement to expedite file positioning and record retrieval and update operations.\par The memory, implemented as a large capacity charge-coupled device or magnetic domain memory, permits efficient data management on very large databases at the level of secondary storage and lends itself to applications as a universal disk replacement, particularly in database computers.", acknowledgement = ack-nhfb, annote = "Shift-register memory, structured as a two-dimensional array tailored to perform with great efficiency data management operations on sequentially organized files", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data transformations; deletion; hardware support database machine TODS, computer operating systems; insertion; LIFO/FIFO operation modes; management of sequentially organized files; record retrieval; relocation; shift-register memories; updating", subject = "Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}; Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3)", } @Article{Rosenkrantz:1978:SLC, author = "David J. Rosenkrantz and Richard E. Stearns and Philip M. {Lewis, II}", title = "System Level Concurrency Control for Distributed Database Systems", journal = j-TODS, volume = "3", number = "2", pages = "178--198", month = jun, year = "1978", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1978-3-2/p178-rosenkrantz/p178-rosenkrantz.pdf; http://www.acm.org/pubs/citations/journals/tods/1978-3-2/p178-rosenkrantz/", abstract = "A distributed database system is one in which the database is spread among several sites and application programs ``move'' from site to site to access and update the data they need. The concurrency control is that portion of the system that responds to the read and write requests of the application programs. Its job is to maintain the global consistency of the distributed database while ensuring that the termination of the application programs is not prevented by phenomena such as deadlock. We assume each individual site has its own local concurrency control which responds to requests at that site and can only communicate with concurrency controls at other sites when an application program moves from site to site, terminates, or aborts.\par This paper presents designs for several distributed concurrency controls and demonstrates that they work correctly. It also investigates some of the implications of global consistency of a distributed database and discusses phenomena that can prevent termination of application programs.", acknowledgement = ack-nhfb, annote = "Later arriving transactions may be aborted if not yet in the commit stage.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency; consistency; data base systems; database; deadlock; deadly embrace; distributed; integrity; lock; readers and writers; restart; rollback; transaction", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Distributed databases}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Concurrency}", } @Article{Delobel:1978:NHD, author = "Claude Delobel", title = "Normalization and Hierarchical Dependencies in the Relational Data Model", journal = j-TODS, volume = "3", number = "3", pages = "201--222", month = sep, year = "1978", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1978-3-3/p201-delobel/p201-delobel.pdf; http://www.acm.org/pubs/citations/journals/tods/1978-3-3/p201-delobel/", abstract = "The purpose of this paper is to present a new approach to the conceptual design of logical schemata for relational databases. One-to-one, one-to-many, and many-to-many relationships between the attributes of database relations are modeled by means of functional dependencies and multivalued dependencies. A new type of dependency is introduced: first-order hierarchical decomposition. The properties of this new type of dependency are studied and related to the normalization process of relations. The relationship between the concept of first-order hierarchical decomposition and the notion of hierarchical organization of data is discussed through the normalization process.", acknowledgement = ack-nhfb, annote = "One-to-one, one-to-many relationships between the attributes of database relations are modeled by means of functional dependencies and multivalued dependencies. A new type of dependency is first-order hierarchical.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems; data model; first-order hierarchical dependency; functional dependency; hierarchical schema; multivalued dependency; normalization process; relational database; relational model", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Schema and subschema}", } @Article{Smith:1978:SPD, author = "Alan Jay Smith", title = "Sequentiality and Prefetching in Database Systems", journal = j-TODS, volume = "3", number = "3", pages = "223--247", month = sep, year = "1978", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1978-3-3/p223-smith/p223-smith.pdf; http://www.acm.org/pubs/citations/journals/tods/1978-3-3/p223-smith/", abstract = "Sequentiality of access is an inherent characteristic of many database systems. We use this observation to develop an algorithm which selectively prefetches data blocks ahead of the point of reference. The number of blocks prefetched is chosen by using the empirical run length distribution and conditioning on the observed number of sequential block references immediately preceding reference to the current block. The optimal number of blocks to prefetch is estimated as a function of a number of ``costs,'' including the cost of accessing a block not resident in the buffer (a miss), the cost of fetching additional data blocks at fault times, and the cost of fetching blocks that are never referenced. We estimate this latter cost, described as memory pollution, in two ways. We consider the treatment (in the replacement algorithm) of prefetched blocks, whether they are treated as referenced or not, and find that it makes very little difference. Trace data taken from an operational IMS database system is analyzed and the results are presented. We show how to determine optimal block sizes. We find that anticipatory fetching of data can lead to significant improvements in system operation.", acknowledgement = ack-nhfb, annote = "An algorithm which selectively prefetches data blocks ahead of the point of reference. The optimal number of blocks to prefetch is estimated as a function, the cost (a miss), the cost of fetching blocks that are never referenced of fetching additional data blocks, and the cost of fetching blocks that are never referenced.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "buffer management; database systems; dynamic programming; IMS; paging; prefetching; read-ahead caches caching buffer management TODS, data base systems; sequentiality", subject = "Information Systems --- Database Management --- Systems (H.2.4)", } @Article{Schlageter:1978:PSD, author = "Gunter Schlageter", title = "Process Synchronization in Database Systems", journal = j-TODS, volume = "3", number = "3", pages = "248--271", month = sep, year = "1978", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "See errata report in \cite{Bernstein:1979:CPS}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1978-3-3/p248-schlageter/p248-schlageter.pdf; http://www.acm.org/pubs/citations/journals/tods/1978-3-3/p248-schlageter/", abstract = "The problem of process synchronization in database systems is analyzed in a strictly systematic way, on a rather abstract level; the abstraction is chosen such that the essential characteristics of the problem can be distinctly modeled and investigated. Using a small set of concepts, a consistent description of the whole problem is developed; many widely used, but only vaguely defined, notions are defined exactly within this framework. The abstract treatment of the problem immediately leads to practically useful insights with respect to possible solutions, although implementational aspects are not discussed in detail.", acknowledgement = ack-nhfb, annote = "Process synchronization in database systems is analyzed on a rather abstract level. [see Bernstein for comments]", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database consistency; database systems; integrity; locking; operating system support TODS, data base systems; parallel process systems; process synchronization", subject = "Information Systems --- Database Management --- Systems (H.2.4); Information Systems --- Database Management (H.2)", } @Article{Hollaar:1978:SMP, author = "Lee A. Hollaar", title = "Specialized Merge Processor Networks for Combining Sorted Lists", journal = j-TODS, volume = "3", number = "3", pages = "272--284", month = sep, year = "1978", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1978-3-3/p272-hollaar/p272-hollaar.pdf; http://www.acm.org/pubs/citations/journals/tods/1978-3-3/p272-hollaar/", abstract = "In inverted file database systems, index lists consisting of pointers to items within the database are combined to form a list of items which potentially satisfy a user's query. This list merging is similar to the common data processing operation of combining two or more sorted input files to form a sorted output file, and generally represents a large percentage of the computer time used by the retrieval system. Unfortunately, a general purpose digital computer is better suited for complicated numeric processing rather than the simple combining of data. The overhead of adjusting and checking pointers, aligning data, and testing for completion of the operation overwhelm the processing of the data.\par A specialized processor can perform most of these overhead operations in parallel with the processing of the data, thereby offering speed increases by a factor from 10 to 100 over conventional computers, depending on whether a higher speed memory is used for storing the lists. These processors can also be combined into networks capable of directly forming the result of a complex expression, with another order of magnitude speed increase possible. The programming and operation of these processors and networks is discussed, and comparisons are made with the speed and efficiency of conventional general purpose computers.", acknowledgement = ack-nhfb, classification = "722; 723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "backend processors; binary tree networks; computer architecture --- program processors; computer system architecture; full text retrieval systems; hardware support database machine TODS, data base systems; inverted file databases; nonnumeric processing; pipelined networks; sorted list merging", subject = "Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3); Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}", } @Article{Haerder:1978:IGA, author = "Theo Haerder", title = "Implementing a Generalized Access Path Structure for a Relational Database System", journal = j-TODS, volume = "3", number = "3", pages = "285--298", month = sep, year = "1978", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1978-3-3/p285-haerder/p285-haerder.pdf; http://www.acm.org/pubs/citations/journals/tods/1978-3-3/p285-haerder/", abstract = "A new kind of implementation technique for access paths connecting sets of tuples qualified by attribute values is described. It combines the advantages of pointer chain and multilevel index implementation techniques. Compared to these structures the generalized access path structure is at least competitive in performing retrieval and update operations, while a considerable storage space saving is gained. Some additional features of this structure support $m$-way joins and the evaluation of multirelation queries, and allow efficient checks of integrity assertions and simple reorganization schemes.", acknowledgement = ack-nhfb, annote = "Implementation technique for access paths connecting sets of tuples qualified by attribute values combines the advantages of pointer chains and multilevel indexes. Features of this structure support m-way joins.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems, Harder multi-relation indices TODS", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}", } @Article{Yu:1978:PP, author = "C. T. Yu and M. K. Siu and K. Lam", title = "On a Partitioning Problem", journal = j-TODS, volume = "3", number = "3", pages = "299--309", month = sep, year = "1978", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1978-3-3/p299-yu/p299-yu.pdf; http://www.acm.org/pubs/citations/journals/tods/1978-3-3/p299-yu/", abstract = "This paper investigates the problem of locating a set of ``boundary points'' of a large number of records. Conceptually, the boundary points partition the records into subsets of roughly the same number of elements, such that the key values of the records in one subset are all smaller or all larger than those of the records in another subset. We guess the locations of the boundary points by linear interpolation and check their accuracy by reading the key values of the records on one pass. This process is repeated until all boundary points are determined. Clearly, this problem can also be solved by performing an external tape sort. Both analytical and empirical results indicate that the number of passes required is small in comparison with that in an external tape sort. This kind of record partitioning may be of interest in setting up a statistical database system.", acknowledgement = ack-nhfb, annote = "Boundary points partition the records into subsets of roughly the same number of elements. We guess the locations of the boundary points by linear interpolation and check their accuracy by reading the key values of the records on one pass. This process is repeated until all boundary points are determined.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "CTYu TODS, data base systems; external sort; key value; partition; passes; tape probability", subject = "Computing Methodologies --- Image Processing And Computer Vision --- Segmentation (I.4.6): {\bf Region growing, partitioning}", } @Article{Fagin:1978:AM, author = "Ronald Fagin", title = "On an Authorization Mechanism", journal = j-TODS, volume = "3", number = "3", pages = "310--319", month = sep, year = "1978", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1978-3-3/p310-fagin/p310-fagin.pdf; http://www.acm.org/pubs/citations/journals/tods/1978-3-3/p310-fagin/", abstract = "Griffiths and Wade ({\em ACM Trans. Database Syst. 1,3}, (Sept. 1976), 242-255) have defined a dynamic authorization mechanism that goes beyond the traditional password approach. A database user can grant or revoke privileges (such as to read, insert, or delete) on a file that he has created. Furthermore, he can authorize others to grant these same privileges. The database management system keeps track of a directed graph, emanating from the creator, of granted privileges. The nodes of the graph correspond to users, and the edges (each of which is labeled with a timestamp) correspond to grants. The edges are of two types, corresponding to whether or not the recipient of the grant has been given the option to make further grants of this privilege. Furthermore, for each pair $ A, B $ of nodes, there can be no more than one edge of each type from $A$ to $B$. We modify this approach by allowing graphs in which there can be multiple edges of each type from one node to another. We prove correctness (in a certain strong sense) for our modified authorization mechanism. Further, we show by example that under the original mechanism, the system might forbid some user from exercising or granting a privilege that he ``should'' be allowed to exercise or grant.", acknowledgement = ack-nhfb, annote = "We prove correctness for our modified authorization mechanism", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "access control; authorization; data base systems; database; privacy; proof of correctness; protection; revocation; security", subject = "Information Systems --- Database Management --- Database Administration (H.2.7): {\bf Security, integrity, and protection}", } @Article{Salton:1978:GSC, author = "G. Salton and A. Wong", title = "Generation and Search of Clustered Files", journal = j-TODS, volume = "3", number = "4", pages = "321--346", month = dec, year = "1978", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1978-3-4/p321-salton/p321-salton.pdf; http://www.acm.org/pubs/citations/journals/tods/1978-3-4/p321-salton/", abstract = "A classified, or clustered file is one where related, or similar records are grouped into classes, or clusters of items in such a way that all items within a cluster are jointly retrievable. Clustered files are easily adapted to broad and narrow search strategies, and simple file updating methods are available. An inexpensive file clustering method applicable to large files is given together with appropriate file search methods. An abstract model is then introduced to predict the retrieval effectiveness of various search methods in a clustered file environment. Experimental evidence is included to test the versatility of the model and to demonstrate the role of various parameters in the cluster search process.", acknowledgement = ack-nhfb, annote = "Automatic classification for information retrieval", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "automatic classification; cluster searching; clustered files; data processing; fast classification; file organization; probabilistic models", subject = "Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}; Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Clustering}; Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Search process}", } @Article{Banerjee:1978:CCD, author = "Jayanta Banerjee and Richard I. Baum and David K. Hsiao", title = "Concepts and Capabilities of a Database Computer", journal = j-TODS, volume = "3", number = "4", pages = "347--384", month = dec, year = "1978", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1978-3-4/p347-banerjee/p347-banerjee.pdf; http://www.acm.org/pubs/citations/journals/tods/1978-3-4/p347-banerjee/", abstract = "The concepts and capabilities of a database computer (DBC) are given in this paper. The proposed design overcomes many of the traditional problems of database system software and is one of the first to describe a complete data-secure computer capable of handling large databases.\par This paper begins by characterizing the major problems facing today's database system designers. These problems are intrinsically related to the nature of conventional hardware and can only be solved by introducing new architectural concepts. Several such concepts are brought to bear in the later sections of this paper. These architectural principles have a major impact upon the design of the system and so they are discussed in some detail. A key aspect of these principles is that they can be implemented with near-term technology. The rest of the paper is devoted to the functional characteristics and the theory of operation of the DBC. The theory of operation is based on a series of abstract models of the components and data structures employed by the DBC. These models are used to illustrate how the DBC performs access operations, manages data structures and security specifications, and enforces security requirements. Short Algol-like algorithms are used to show how these operations are carried out. This part of the paper concludes with a high-level description of the DBC organization. The actual details of the DBC hardware are quite involved and so their presentation is not the subject of this paper.\par A sample database is included in the Appendix to illustrate the working of the security and clustering mechanisms of the DBC.", acknowledgement = ack-nhfb, annote-1 = "The concepts of a database computer (DBC) are given. The theory of operation is based on abstract models. The DBC performs access operations, manages data structures and security specifications.", annote-2 = "The correct author order (from the running heads and table of contents) is Banerjee, Baum, Hsiao: the article cover page has Banerjee, Hsiao, Baum, because the first two share a common address.", classification = "722; 723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "clustering; content-addressable memory; data base systems; database computers; hardware support machine TODS, computer architecture; keywords; mass memory; performance; security; structure memory", subject = "Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Clustering}", } @Article{Bradley:1978:EOC, author = "J. Bradley", title = "An Extended Owner-Coupled Set Data Model and Predicate Calculus for Database Management", journal = j-TODS, volume = "3", number = "4", pages = "385--416", month = dec, year = "1978", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1978-3-4/p385-bradley/p385-bradley.pdf; http://www.acm.org/pubs/citations/journals/tods/1978-3-4/p385-bradley/", abstract = "A data model is presented, based on the extension of the concept of a DBTG owner-coupled set to permit {\em static\/} and {\em dynamic\/} sets and a new kind of set referred to as a {\em virtual\/} set. The notion of {\em connection fields\/} is introduced, and it is shown how connection fields may be used to construct derived information bearing set names, and hence permit the specification of (dynamic) sets which are not predeclared in a schema. Virtual sets are shown to reflect the functional dependencies which can exist within a file. A technique which permits the data model to be fully described diagrammatically by {\em extended Bachman diagrams\/} is described. A predicate calculus for manipulation of this data model is presented. Expressions written in this calculus are compared with corresponding expressions in a relational predicate calculus, DSL ALPHA. An argument for the relational completeness of the language is given.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Codasyl DBTG; connection field; data base systems; DSL AlPHA; dynamic set; extended Bachman diagram; extended owner-coupled set data model; extended owner-coupled set predicate calculus; functional dependency; information bearing set name; owner-coupled set; static set; virtual set", subject = "Information Systems --- Database Management (H.2)", } @Article{Shneiderman:1978:IHF, author = "Ben Shneiderman", title = "Improving the Human Factors Aspect of Database Interactions", journal = j-TODS, volume = "3", number = "4", pages = "417--439", month = dec, year = "1978", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1978-3-4/p417-shneiderman/p417-shneiderman.pdf; http://www.acm.org/pubs/citations/journals/tods/1978-3-4/p417-shneiderman/", abstract = "The widespread dissemination of computer and information systems to nontechnically trained individuals requires a new approach to the design and development of database interfaces. This paper provides the motivational background for controlled psychological experimentation in exploring the person\slash machine interface. Frameworks for the reductionist approach are given, research methods discussed, research issues presented, and a small experiment is offered as an example of what can be accomplished. This experiment is a comparison of natural and artificial language query facilities. Although subjects posed approximately equal numbers of valid queries with either facility, natural language users made significantly more invalid queries which could not be answered from the database that was described.", acknowledgement = ack-nhfb, classification = "461; 723; 901", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems; data models; database systems; experimentation; human engineering; human factors; natural language interfaces; psychology; query languages; systems science and cybernetics --- man machine systems", subject = "Information Systems --- Models and Principles --- User/Machine Systems (H.1.2): {\bf Human factors}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Query languages}; Information Systems --- Database Management --- Systems (H.2.4)", } @Article{Comer:1978:DOI, author = "Douglas Comer", title = "The Difficulty of Optimum Index Selection", journal = j-TODS, volume = "3", number = "4", pages = "440--445", month = dec, year = "1978", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1978-3-4/p440-comer/p440-comer.pdf; http://www.acm.org/pubs/citations/journals/tods/1978-3-4/p440-comer/", abstract = "Given a file on a secondary store in which each record has several attributes, it is usually advantageous to build an index mechanism to decrease the cost of conducting transactions to the file. The problem of selecting attributes over which to index has been studied in the context of various storage structures and access assumptions. One algorithm to make an optimum index selection requires 2 $k$ steps in the worst case, where $k$ is the number of attributes in the file. We examine the question of whether a more efficient algorithm might exist and show that even under a simple cost criterion the problem is computationally difficult in a precise sense. Our results extend directly to other related problems where the cost of the index depends on fixed values which are assigned to each attribute. Some practical implications are discussed.", acknowledgement = ack-nhfb, annote = "Theorem: optimum index selection problem OISP is NP-complete for files of degree $ d \ge 2 $.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "attribute selection; complexity; index selection; physical database design; secondary index", subject = "Information Systems --- Information Storage and Retrieval --- Content Analysis and Indexing (H.3.1): {\bf Indexing methods}", } @Article{Babb:1979:IRD, author = "E. Babb", title = "Implementing a Relational Database by Means of Specialized Hardware", journal = j-TODS, volume = "4", number = "1", pages = "1--29", month = mar, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-1/p1-babb/p1-babb.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-1/p1-babb/", abstract = "New hardware is described which allows the rapid execution of queries demanding the joining of physically stored relations. The main feature of the hardware is a special store which can rapidly remember or recall data. This data might be pointers from one file to another, in which case the memory helps with queries on joins of files. Alternatively, the memory can help remove redundant data during projection[s??], giving a considerable speed advantage over conventional hardware.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "bit array; CAFS; content addressing; database; hardware support machine bit vector filter probabilistic semi-join TODS, data base systems; hashing; information retrieval; join; projection; relational model; selection; special hardware", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}", } @Article{Beeri:1979:CPR, author = "Catriel Beeri and Philip A. Bernstein", title = "Computational Problems Related to the Design of Normal Form Relational Schemas", journal = j-TODS, volume = "4", number = "1", pages = "30--59", month = mar, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Compiler/prog.lang.theory.bib; Database/Graefe.bib; Database/Wiederhold.bib; Distributed/gesturing.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: errata in ACM Transactions on Database Systems, Vol. 4 No. 3, Sep. 1979, pp. 396.", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-1/p30-beeri/p30-beeri.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-1/p30-beeri/", abstract = "Problems related to functional dependencies and the algorithmic design of relational schemas are examined. Specifically, the following results are presented: (1) a tree model of derivations of functional dependencies from other functional dependencies; (2) a linear-time algorithm to test if a functional dependency is in the closure of a set of functional dependencies; (3) a quadratic-time implementation of Bernstein's third normal form schema synthesis algorithm. \par Furthermore, it is shown that most interesting algorithmic questions about Boyce-Codd normal form and keys are {\em NP\/}-complete and are therefore probably not amenable to fast algorithmic solutions.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems", subject = "Information Systems --- Database Management --- Logical Design (H.2.1); Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}", } @Article{Lockemann:1979:DAD, author = "Peter C. Lockemann and Heinrich C. Mayr and Wolfgang H. Weil and Wolfgang H. Wohlleber", title = "Data Abstractions for Database Systems", journal = j-TODS, volume = "4", number = "1", pages = "60--75", month = mar, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-1/p60-lockemann/p60-lockemann.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-1/p60-lockemann/", abstract = "Data abstractions were originally conceived as a specification tool in programming. They also appear to be useful for exploring and explaining the capabilities and shortcomings of the data definition and manipulation facilities of present-day database systems. Moreover they may lead to new approaches to the design of these facilities. In the first section the paper introduces an axiomatic method for specifying data abstractions and, on that basis, gives precise meaning to familiar notions such as data model, data type, and database schema. In a second step the various possibilities for specifying data types within a given data model are examined and illustrated. It is shown that data types prescribe the individual operations that are allowed within a database. Finally, some additions to the method are discussed which permit the formulation of interrelationships between arbitrary operations.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "abstract data type; data abstraction; data base systems; data definition language; data manipulation language; data model; data structure; data type; database consistency; database design; database schema; integrity constraints; specification", subject = "Information Systems --- Database Management --- Systems (H.2.4); Information Systems --- Database Management --- Languages (H.2.3): {\bf Data manipulation languages (DML)}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Schema and subschema}", } @Article{Denning:1979:TTS, author = "Dorothy E. Denning and Peter J. Denning and Mayer D. Schwartz", title = "The Tracker: a Threat to Statistical Database Security", journal = j-TODS, volume = "4", number = "1", pages = "76--96", month = mar, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-1/p76-denning/p76-denning.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-1/p76-denning/", abstract = "The query programs of certain databases report raw statistics for query sets, which are groups of records specified implicitly by a characteristic formula. The raw statistics include query set size and sums of powers of values in the query set. Many users and designers believe that the individual records will remain confidential as long as query programs refuse to report the statistics of query sets which are too small. It is shown that the compromise of small query sets can in fact almost always be accomplished with the help of characteristic formulas called trackers. J. Schl{\"o}rer's individual tracker is reviewed; it is derived from known characteristics of a given individual and permits deducing additional characteristics he may have. The general tracker is introduced: It permits calculating statistics for arbitrary query sets, without requiring preknowledge of anything in the database. General trackers always exist if there are enough distinguishable classes of individuals in the database, in which case the trackers have a simple form. Almost all databases have a general tracker, and general trackers are almost always easy to find. Security is not guaranteed by the lack of a general tracker.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "confidentiality; data base systems; data processing; data security; database security; secure query functions; statistical database; tracker", subject = "Information Systems --- Database Management --- General (H.2.0): {\bf Security, integrity, and protection**}; Information Systems --- Database Management --- Database Applications (H.2.8): {\bf Statistical databases}", } @Article{Dobkin:1979:SDP, author = "David Dobkin and Anita K. Jones and Richard J. Lipton", title = "Secure Databases: Protection Against User Influence", journal = j-TODS, volume = "4", number = "1", pages = "97--106", month = mar, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-1/p97-dobkin/p97-dobkin.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-1/p97-dobkin/", abstract = "Users may be able to compromise databases by asking a series of questions and then inferring new information from the answers. The complexity of protecting a database against this technique is discussed here.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "compromise; data base systems; database; inference; information flow; protection; security; Security TODS, data processing; statistical query", subject = "Information Systems --- Database Management --- Database Administration (H.2.7): {\bf Security, integrity, and protection}", } @Article{Kent:1979:LRB, author = "William Kent", title = "Limitations of Record-Based Information Models", journal = j-TODS, volume = "4", number = "1", pages = "107--131", month = mar, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-1/p107-kent/p107-kent.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-1/p107-kent/", abstract = "Record structures are generally efficient, familiar, and easy to use for most current data processing applications. But they are not complete in their ability to represent information, nor are they fully self-describing.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "conceptual model; data model; entities; first normal form; information model; normalization; records; relationships; semantic model", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Normal forms}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}", } @Article{Yao:1979:OQE, author = "S. Bing Yao", title = "Optimization of Query Evaluation Algorithms", journal = j-TODS, volume = "4", number = "2", pages = "133--155", month = jun, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-2/p133-yao/p133-yao.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-2/p133-yao/", abstract = "A model of database storage and access is presented. The model represents many evaluation algorithms as special cases, and helps to break a complex algorithm into simple access operations. Generalized access cost equations associated with the model are developed and analyzed. Optimization of these cost equations yields an optimal access algorithm which can be synthesized by a query subsystem whose design is based on the modular access operations.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming languages; data base systems; data manipulation language; database optimization; inverted file; query language; query languages; query optimization; relational data model", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Query languages}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Data manipulation languages (DML)}", } @Article{Schwartz:1979:LQS, author = "M. D. Schwartz and D. E. Denning and P. J. Denning", title = "Linear Queries in Statistical Databases", journal = j-TODS, volume = "4", number = "2", pages = "156--167", month = jun, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-2/p156-schwartz/p156-schwartz.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-2/p156-schwartz/", abstract = "A database is compromised if a user can determine the data elements associated with keys which he did not know previously. If it is possible, compromise can be achieved by posing a finite set of queries over sets of data elements and employing initial information to solve the resulting system of equations. Assuming the allowable queries are linear, that is, weighted sums of data elements, we show how compromise can be achieved and we characterize the maximal initial information permitted of a user in a secure system. When compromise is possible, the initial information and the number of queries required to achieve it is surprisingly small.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "confidentiality; data base systems; data processing --- security of data; data security; database security; inference; linear query; secure query functions; statistical database", subject = "Information Systems --- Database Management --- Database Applications (H.2.8): {\bf Statistical databases}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}; Information Systems --- Database Management --- General (H.2.0): {\bf Security, integrity, and protection**}", } @Article{Aho:1979:OPM, author = "Alfred V. Aho and Jeffrey D. Ullman", title = "Optimal Partial-Match Retrieval When Fields are Independently Specified", journal = j-TODS, volume = "4", number = "2", pages = "168--179", month = jun, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Graphics/siggraph/79.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-2/p168-aho/p168-aho.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-2/p168-aho/", abstract = "This paper considers the design of a system to answer partial-match queries from a file containing a collection of records, each record consisting of a sequence of fields. A partial-match query is a specification of values for zero or more fields of a record, and the answer to a query is a listing of all records in the file whose fields match the specified values.\par A design is considered in which the file is stored in a set of bins. A formula is derived for the optimal number of bits in a bin address to assign to each field, assuming the probability that a given field is specified in a query is independent of what other fields are specified. Implications of the optimality criterion on the size of bins are also discussed.", acknowledgement = ack-nhfb, classification = "723; 901", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "associative searching; data processing --- file organization; file organization; hashing; information retrieval; information science; partial-match retrieval; searching", oldlabel = "geom-2", subject = "Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Retrieval models}; Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Search process}; Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}", } @Article{Thomas:1979:MCA, author = "Robert H. Thomas", title = "A Majority Consensus Approach to Concurrency Control for Multiple Copy Databases", journal = j-TODS, volume = "4", number = "2", pages = "180--209", month = jun, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; Distributed/fault.tolerant.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-2/p180-thomas/p180-thomas.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-2/p180-thomas/", abstract = "A ``majority consensus'' algorithm which represents a new solution to the update synchronization problem for multiple copy databases is presented. The algorithm embodies distributed control and can function effectively in the presence of communication and database site outages. The correctness of the algorithm is demonstrated and the cost of using it is analyzed. Several examples that illustrate aspects of the algorithm operation are included in the Appendix.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "clock synchronization; computer networks; concurrency control; data base systems; distributed computation; distributed control; distributed databases; multiprocess systems; update synchronization", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Concurrency}; Information Systems --- Information Storage and Retrieval --- Systems and Software (H.3.4): {\bf Distributed systems}", } @Article{Ries:1979:LGR, author = "Daniel R. Ries and Michael R. Stonebraker", title = "Locking Granularity Revisited", journal = j-TODS, volume = "4", number = "2", pages = "210--227", month = jun, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-2/p210-ries/p210-ries.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-2/p210-ries/", abstract = "Locking granularity refers to the size and hence the number of locks used to ensure the consistency of a database during multiple concurrent updates. In an earlier simulation study we concluded that coarse granularity, such as area or file locking, is to be preferred to fine granularity such as individual page or record locking.\par However, alternate assumptions than those used in the original paper can change that conclusion. First, we modified the assumptions concerning the placement of the locks on the database with respect to the accessing transactions. In the original model the locks were assumed to be well placed. Under worse case and random placement assumptions when only very small transactions access the database, fine granularity is preferable. \par Second, we extended the simulation to model a lock hierarchy where large transactions use large locks and small transactions use small locks. In this scenario, again under the random and worse case lock placement assumptions, fine granularity is preferable if all transactions accessing more than 1 percent of the database use large locks.\par Finally, the simulation was extended to model a ``claim as needed'' locking strategy together with the resultant possibility of deadlock. In the original study all locks were claimed in one atomic operation at the beginning of a transaction. The claim as needed strategy does not change the conclusions concerning the desired granularity.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency; database management; locking granularity; locking hierarchies; multiple updates; TODS Ingres, data base systems", subject = "Information Systems --- Database Management --- Database Administration (H.2.7): {\bf Security, integrity, and protection}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Concurrency}; Information Systems --- Database Management (H.2); Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Deadlock avoidance}", } @Article{Burkhard:1979:PMH, author = "Walter A. Burkhard", title = "Partial-Match Hash Coding: Benefits of Redundancy", journal = j-TODS, volume = "4", number = "2", pages = "228--239", month = jun, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; Graphics/siggraph/79.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-2/p228-burkhard/p228-burkhard.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-2/p228-burkhard/", abstract = "File designs suitable for retrieval from a file of $k$-field records when queries may be partially specified are examined. Storage redundancy is introduced to obtain improved worst-case and average-case performances. The resulting storage schemes are appropriate for replicated distributed database environments; it is possible to improve the overall average and worst-case behavior for query response as well as provide an environment with very high reliability. Within practical systems it will be possible to improve the query response time performance as well as reliability over comparable systems without replication.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "access methods; algorithms; analysis; data base systems; data processing --- file organization; data structures; database systems; replication; searching", oldlabel = "geom-100", subject = "Information Systems --- Database Management (H.2); Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Access methods}; Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Search process}", } @Article{Raghavan:1979:EDR, author = "Vijay V. Raghavan and C. T. Yu", title = "Experiments on the Determination of the Relationships Between Terms", journal = j-TODS, volume = "4", number = "2", pages = "240--260", month = jun, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-2/p240-raghavan/p240-raghavan.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-2/p240-raghavan/", abstract = "The retrieval effectiveness of an automatic method that uses relevance judgments for the determination of positive as well as negative relationships between terms is evaluated. The term relationships are incorporated into the retrieval process by using a generalized similarity function that has a term match component, a positive term relationship component, and a negative term relationship component. Two strategies, query partitioning and query clustering, for the evaluation of the effectiveness of the term relationships are investigated. The latter appears to be more attractive from linguistic as well as economic points of view. The positive and the negative relationships are verified to be effective both when used individually, and in combination. The importance attached to the term relationship components relative to that of term match component is found to have a substantial effect on the retrieval performance. The usefulness of discriminant analysis as a technique for determining the relative importance of these components is investigated.", acknowledgement = ack-nhfb, classification = "723; 901", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "antonym; document retrieval; feedback; information science; pseudoclassification; semantics; statistical discrimination; synonym; term associations; thesaurus", subject = "Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3); Information Systems --- Information Storage and Retrieval --- Content Analysis and Indexing (H.3.1): {\bf Thesauruses}; Information Systems --- Database Management --- Database Applications (H.2.8): {\bf Statistical databases}", } @Article{Lipski:1979:SIC, author = "Witold {Lipski, Jr.}", title = "On Semantic Issues Connected with Incomplete Information Databases", journal = j-TODS, volume = "4", number = "3", pages = "262--296", month = sep, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Ai/nonmono.bib; Compendex database; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Parallel/Multi.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-3/p262-lipski/p262-lipski.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-3/p262-lipski/", abstract = "Various approaches to interpreting queries in a database with incomplete information are discussed. A simple model of a database is described, based on attributes which can take values in specified attribute domains. Information incompleteness means that instead of having a single value of an attribute, we have a subset of the attribute domain, which represents our knowledge that the actual value, though unknown, is one of the values in this subset. This extends the idea of Codd's null value, corresponding to the case when this subset is the whole attribute domain. A simple query language to communicate with such a system is described and its various semantics are precisely defined. We emphasize the distinction between two different interpretations of the query language--the external one, which refers the queries directly to the real world modeled in an incomplete way by the system, and the internal one, under which the queries refer to the system's information about this world, rather than to the world itself. Both external and internal interpretations are provided with the corresponding sets of axioms which serve as a basis for equivalent transformations of queries. The technique of equivalent transformations of queries is then extensively exploited for evaluating the interpretation of (i.e., the response to) a query.", acknowledgement = ack-nhfb, annote = "Attributes can take values in specified attribute domains. Instead a single value of an attribute, we have a subset of the attribute domain, which represents our knowledge that the actual value, though unknown, is one of the values in this subset. This extends the idea of Codd's null value, corresponding to the case when this subset is the whole attribute domain.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems; database; incomplete information; model logic; null values; query language semantics; relational model", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Query languages}", } @Article{Aho:1979:TJR, author = "A. V. Aho and C. Beeri and J. D. Ullman", title = "The theory of joins in relational databases", journal = j-TODS, volume = "4", number = "3", pages = "297--314", month = sep, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "See corrigendum \cite{Ullman:1983:CTJ}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-3/p297-aho/p297-aho.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-3/p297-aho/", abstract = "Answering queries in a relational database often requires that the natural join of two or more relations be computed. However, the result of a join may not be what one expects. In this paper we give efficient algorithms to determine whether the join of several relations has the intuitively expected value (is {\em lossless\/}) and to determine whether a set of relations has a subset with a lossy join. These algorithms assume that all data dependencies are functional. We then discuss the extension of our techniques to the case where data dependencies are multivalued.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems; decomposition of database schemes; functional dependencies; lossless join; multivalued dependencies; natural join; projection of dependencies; relational databases", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Models and Principles --- Systems and Information Theory (H.1.1)", } @Article{Fagin:1979:EHF, author = "Ronald Fagin and J{\"u}rg Nievergelt and Nicholas Pippenger and H. Raymond Strong", key = "Fagin et al.", title = "Extendible Hashing --- a Fast Access Method for Dynamic Files", journal = j-TODS, volume = "4", number = "3", pages = "315--344", month = sep, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/bin-packing.bib; Misc/is.bib", note = "Also published in/as: IBM Research Report RJ2305, Jul. 1978. See \cite{Regnier:1985:AGF}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-3/p315-fagin/p315-fagin.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-3/p315-fagin/", abstract = "Extendible hashing is a new access technique, in which the user is guaranteed no more than two page faults to locate the data associated with a given unique identifier, or key. Unlike conventional hashing, extendible hashing has a dynamic structure that grows and shrinks gracefully as the database grows and shrinks. This approach simultaneously solves the problem of making hash tables that are extendible and of making radix search trees that are balanced. We study, by analysis and simulation, the performance of extendible hashing. The results indicate that extendible hashing provides an attractive alternative to other access methods, such as balanced trees.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", journalabr = "ACM Trans Database Syst", keywords = "access method; B-tree; data processing; directory; extendible hashing; external hashing; file organization; hashing; index; radix search; searching; trie", remark = "The user is guaranteed no more than two page faults to locate the data associated with a given unique identifier, or key. Extendible hashing has a dynamic structure that grows and shrinks as the database grows and shrinks.", subject = "Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Access methods}; Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}; Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Search process}", } @Article{Lam:1979:PSH, author = "Chat Yu Lam and Stuart E. Madnick", title = "Properties of Storage Hierarchy Systems with Multiple Page Sizes and Redundant Data", journal = j-TODS, volume = "4", number = "3", pages = "345--367", month = sep, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/citations/journals/tods/1979-4-3/p345-lam/", abstract = "The need for high performance, highly reliable storage for very large on-line databases, coupled with rapid advances in storage device technology, has made the study of generalized storage hierarchies an important area of research.\par This paper analyzes properties of a data storage hierarchy system specifically designed for handling very large on-line databases. To attain high performance and high reliability, the data storage hierarchy makes use of multiple page sizes in different storage levels and maintains multiple copies of the same information across the storage levels. Such a storage hierarchy system is currently being designed as part of the INFOPLEX database computer project. Previous studies of storage hierarchies have primarily focused on virtual memories for program storage and hierarchies with a single page size across all storage levels and/or a single copy of information in the hierarchy.\par In the INFOPLEX design, extensions to the least recently used (LRU) algorithm are used to manage the storage levels. The read-through technique is used to initially load a referenced page of the appropriate size into all storage levels above the one in which the page is found. Since each storage level is viewed as an extension of the immediate higher level, an overflow page from level $i$ is always placed in level $ i + 1 $. Important properties of these algorithms are derived. It is shown that depending on the types of algorithms used and the relative sizes of the storage levels, it is not always possible to guarantee that the contents of a given storage level $i$ is always a superset of the contents of its immediate higher storage level $ i - 1 $. The necessary and sufficient conditions for this property to hold are identified and proved. Furthermore, it is possible that increasing the size of intermediate storage levels may actually increase the number of references to lower storage levels, resulting in reduced performance. Conditions necessary to avoid such an anomaly are also identified and proved.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems; data storage hierarchy; database computer; inclusion properties; modeling; perform and reliability analysis; storage management algorithms; very large databases", subject = "Information Systems --- Database Management (H.2); Software --- Operating Systems --- Storage Management (D.4.2): {\bf Storage hierarchies}", } @Article{Buneman:1979:EMR, author = "O. Peter Buneman and Eric K. Clemons", title = "Efficiently Monitoring Relational Databases", journal = j-TODS, volume = "4", number = "3", pages = "368--382", month = sep, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: Working paper, 76-10-08, Dep. Decision Sciences, The Wharton School, Un. Penn, PA, Jun. 1977.", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-3/p368-buneman/p368-buneman.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-3/p368-buneman/", abstract = "An alerter is a program which monitors a database and reports to some user or program when a specified condition occurs. It may be that the condition is a complicated expression involving several entities in the database; in this case the evaluation of the expression may be computationally expensive. A scheme is presented in which alerters may be placed on a complex query involving a relational database, and a method is demonstrated for reducing the amount of computation involved in checking whether an alerter should be triggered.", acknowledgement = ack-nhfb, annote = "An alerter monitors a database and reports when a specific condition occurs. Alerters may be placed on a query, a method is demonstrated for reducing the amount of computation involved in checking whether an alerter should be triggered. Recomputation of derived data with pruning, viz. identity connection.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "alerters; data base systems; exception reporting; integrity constraints; programming techniques; relational databases", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}", } @Article{Comer:1979:HTI, author = "Douglas Comer", title = "Heuristics For Trie Index Minimization", journal = j-TODS, volume = "4", number = "3", pages = "383--395", month = sep, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-3/p383-comer/p383-comer.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-3/p383-comer/", abstract = "A trie is a digital search tree in which leaves correspond to records in a file. Searching proceeds from the root to a leaf, where the edge taken at each node depends on the value of an attribute in the query. Trie implementations have the advantage of being fast, but the disadvantage of achieving that speed at great expense in storage space. Of primary concern in making a trie practical, therefore, is the problem of minimizing storage requirements. One method for reducing the space required is to reorder attribute testing. Unfortunately, the problem of finding an ordering which guarantees a minimum-size trie is NP-complete. In this paper we investigate several heuristics for reordering attributes, and derive bounds on the sizes of the worst tries produced by them in terms of the underlying file. Although the analysis is presented for a binary file, extensions to files of higher degree are shown.\par Another alternative for reducing the space required by a trie is an implementation, called an $ \Omega $-trie, in which the order of attribute testing is contained in the trie itself. We show that for most applications, $ \Omega $-tries are smaller than other implementations of tries, even when heuristics for improving storage requirements are employed.", acknowledgement = ack-nhfb, annote = "Of primary concern in making a trie practical is the problem of minimizing storage requirements. One method for reducing the space is attribute testing which is NP-complete. Another alternative is an $ \Omega $-trie, in which the order of attribute testing is contained in the trie itself. $ \Omega $-tries are smaller than other implementations of tries.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data processing; doubly chained tree; index; trie; trie minimization", subject = "Data --- Data Structures (E.1); Information Systems --- Database Management (H.2)", } @Article{Codd:1979:EDR, author = "E. F. Codd", title = "Extending the Database Relational Model to Capture More Meaning", journal = j-TODS, volume = "4", number = "4", pages = "397--434", month = dec, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Ai/nonmono.bib; Compendex database; Compiler/prog.lang.theory.bib; Database/Graefe.bib; Database/Wiederhold.bib; Distributed/gesturing.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib", note = "Reprinted in \cite[pp.~457--475]{Stonebraker:1988:RDS}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-4/p397-codd/p397-codd.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-4/p397-codd/", abstract = "During the last three or four years several investigators have been exploring ``semantic models'' for formatted databases. The intent is to capture (in a more or less formal way) more of the meaning of the data so that database design can become more systematic and the database system itself can behave more intelligently. Two major thrusts are clear.\par (1) the search for meaningful units that are as small as possible-- {\em atomic semantics\/};\par (2) the search for meaningful units that are larger than the usual $n$-ary relation-- {\em molecular semantics}.\par In this paper we propose extensions to the relational model to support certain atomic and molecular semantics. These extensions represent a synthesis of many ideas from the published work in semantic modeling plus the introduction of new rules for insertion, update, and deletion, as well as new algebraic operators.", acknowledgement = ack-nhfb, acmcrnumber = "8905-0330", annote = "``Semantic models'' for formatted databases, to capture in a more or less formal way more of the meaning of the data. Two major thrusts: relation and molecular semantics. Extensions to the relational model (RM/T). New rules for insertion, update, and deletion, as well as new algebraic operators (Theta-select, outer join,\ldots{}.).", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "conceptual model; conceptual schema; data base systems; data model; data semantics; database; database schema; entity model; knowledge base; knowledge representation; relation; relational database; relational model; relational schema; semantic model", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Schema and subschema}", } @Article{Aho:1979:EOC, author = "A. V. Aho and Y. Sagiv and J. D. Ullman", title = "Efficient Optimization of a Class of Relational Expressions", journal = j-TODS, volume = "4", number = "4", pages = "435--454", month = dec, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-4/p435-aho/p435-aho.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-4/p435-aho/", abstract = "The design of several database query languages has been influenced by Codd's relational algebra. This paper discusses the difficulty of optimizing queries based on the relational algebra operations select, project, and join. A matrix, called a tableau, is proposed as a useful device for representing the value of a query, and optimization of queries is couched in terms of finding a minimal tableau equivalent to a given one. Functional dependencies can be used to imply additional equivalences among tableaux. Although the optimization problem is NP-complete, a polynomial time algorithm exists to optimize tableaux that correspond to an important subclass of queries.", acknowledgement = ack-nhfb, annote = "Optimizing queries based on select, project, and join.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems, TODS tableaux optimization; equivalence of queries; NP-completeness; query optimization; relational algebra; relational database; tableaux", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}", } @Article{Maier:1979:TID, author = "David Maier and Alberto O. Mendelzon and Yehoshua Sagiv", title = "Testing Implications of Data Dependencies", journal = j-TODS, volume = "4", number = "4", pages = "455--469", month = dec, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-4/p455-maier/p455-maier.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-4/p455-maier/", abstract = "Presented is a computation method --- the {\em chase\/} --- for testing implication of data dependencies by a set of data dependencies. The chase operates on tableaux similar to those of Aho, Sagiv, and Ullman. The chase includes previous tableau computation methods as special cases. By interpreting tableaux alternately as mappings or as templates for relations, it is possible to test implication of join dependencies (including multivalued dependencies) and functional dependencies by a set of dependencies.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "chase; data base systems; data dependencies; functional dependencies; join dependencies; multivalued dependencies; relational databases; tableaux", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}", } @Article{Housel:1979:PTI, author = "Barron C. Housel", title = "Pipelining: a Technique for Implementing Data Restructurers", journal = j-TODS, volume = "4", number = "4", pages = "470--492", month = dec, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-4/p470-housel/p470-housel.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-4/p470-housel/", abstract = "In the past several years much attention has been given to the problem of data translation. The focus has been mainly on methodologies and specification languages for accomplishing this task. Recently, several prototype systems have emerged, and now the issues of implementation and performance must be addressed. In general, a data restructuring specification may contain multiple source and target files. This specification can be viewed as a ``process graph'' which is a network of restructuring operations subject to precedence constraints. One technique used to achieve good performance is that of pipelining data in the process graph.\par In this paper we address a number of issues pertinent to a pipelining architecture. Specifically, we give algorithms for resolving deadlock situations which can arise, and partitioning the process graph to achieve an optimal schedule for executing the restructuring steps. In addition, we discuss how pipelining has influenced the design of the restructuring operations and the file structures used in an actual system.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data processing; data translation; database conversion; deadlock; pipelining; process scheduling", subject = "Information Systems --- Database Management --- Heterogeneous Databases (H.2.5): {\bf Data translation**}; Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Deadlock avoidance}", } @Article{Shopiro:1979:TPL, author = "Jonathan E. Shopiro", title = "{Theseus} --- {A} Programming Language for Relational Databases", journal = j-TODS, volume = "4", number = "4", pages = "493--517", month = dec, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-4/p493-shopiro/p493-shopiro.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-4/p493-shopiro/", abstract = "Theseus, a very high-level programming language extending EUCLID, is described. Data objects in Theseus include relations and a-sets, a generalization of records. The primary design goals of Theseus are to facilitate the writing of well-structured programs for database applications and to serve as a vehicle for research in automatic program optimization.", acknowledgement = ack-nhfb, annote = "Extending EUCLID. Data objects in Theseus include relations and a-sets", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "compiler organization; computer programming languages; data base systems; relational database languages; very high-level languages", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Database (persistent) programming languages}", } @Article{Yamamoto:1979:DBM, author = "Sumiyasu Yamamoto and Shinsei Tazawa and Kazuhiko Ushio and Hideto Ikeda", title = "Design of a Balanced Multiple-Valued File-Organization Scheme with the Least Redundancy", journal = j-TODS, volume = "4", number = "4", pages = "518--530", month = dec, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-4/p518-yamamoto/p518-yamamoto.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-4/p518-yamamoto/", abstract = "A new balanced file-organization scheme of order two for multiple-valued records is presented. This scheme is called HUBMFS 2 (Hiroshima University Balanced Multiple-valued File-organization Scheme of order two). It is assumed that records are characterized by $m$ attributes having $n$ possible values each, and the query set consists of queries which specify values of two attributes. It is shown that the redundancy of the bucket (the probability of storing a record in the bucket) is minimized if and only if the structure of the bucket is a partite-claw. A necessary and sufficient condition for the existence of an HUBMFS 2, which is composed exclusively of partite-claw buckets, is given. A construction algorithm is also given. The proposed HUBMFS 2 is superior to existing BMFS 2 (Balanced Multiple-valued File-organization Schemes of order two) in that it has the least redundancy among all possible BMFS 2 's having the same parameters and that it can be constructed for a less restrictive set of parameters.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "balanced filing scheme; bucket; claw; data processing; file organization; graph decomposition; information retrieval; information storage; inverted file; multipartite graph; multiple-valued attributes; redundancy; secondary index", subject = "Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Schema and subschema}; Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2)", } @Article{Batory:1979:STF, author = "Don S. Batory", title = "On Searching Transposed Files", journal = j-TODS, volume = "4", number = "4", pages = "531--544", month = dec, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-4/p531-batory/p531-batory.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-4/p531-batory/", abstract = "A transposed file is a collection of nonsequential files called subfiles. Each subfile contains selected attribute data for all records. It is shown that transposed file performance can be enhanced by using a proper strategy to process queries. Analytic cost expressions for processing conjunctive, disjunctive, and batched queries are developed and an effective heuristic for minimizing query processing costs is presented. Formulations of the problem of optimally processing queries for a particular family of transposed files are shown to be NP-complete. Query processing performance comparisons of multilist, inverted, and nonsequential files with transposed files are also considered.", acknowledgement = ack-nhfb, annote = "Analytic cost expressions for processing conjunctive, disjunctive, and batch queries are developed and an effective heuristic for minimizing query processing costs is presented.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems; file searching; inverted file; multilist; NP-complete; query processing; transposed file", subject = "Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}; Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Search process}", } @Article{Bernstein:1979:CPS, author = "Philip A. Bernstein and Marco A. Casanova and Nathan Goodman", title = "Comments on {``Process Synchronization in Database Systems''}", journal = j-TODS, volume = "4", number = "4", pages = "545--546", month = dec, year = "1979", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "See \cite{Schlageter:1978:PSD}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1979-4-4/p545-bernstein/p545-bernstein.pdf; http://www.acm.org/pubs/citations/journals/tods/1979-4-4/p545-bernstein/", acknowledgement = ack-nhfb, annote = "The results of Schlageter are in error.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", subject = "Information Systems --- Database Management (H.2)", } @Article{Rothnie:1980:ISD, author = "James B. {Rothnie, Jr.} and Philip A. Bernstein and S. Fox and N. Goodman and M. Hammer and T. A. Landers and C. Reeve and David W. Shipman and E. Wong", title = "Introduction to a System for Distributed Databases ({SDD-1})", journal = j-TODS, volume = "5", number = "1", pages = "1--17", month = mar, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; Distributed/fault.tolerant.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Parallel/Multi.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1980-5-1/p1-rothnie/p1-rothnie.pdf; http://www.acm.org/pubs/citations/journals/tods/1980-5-1/p1-rothnie/", abstract = "The declining cost of computer hardware and the increasing data processing needs of geographically dispersed organizations have led to substantial interest in distributed data management. SDD-1 is a distributed database management system currently being developed by Computer Corporation of America. Users interact with SDD-1 precisely as if it were a nondistributed database system because SDD-1 handles all issues arising from the distribution of data. These issues include distributed concurrency control, distributed query processing, resiliency to component failure, and distributed directory management. This paper presents an overview of the SDD-1 design and its solutions to the above problems.\par This paper is the first of a series of companion papers on SDD-1 (Bernstein and Shipman [2], Bernstein et al. [4], and Hammer and Shipman [14]).", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency control; data base systems; database reliability; distributed database system; query processing; relational data model", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Distributed databases}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Concurrency}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}", } @Article{Bernstein:1980:CCS, author = "Philip A. Bernstein and David W. Shipman and James B. {Rothnie, Jr.}", title = "Concurrency Control in a System for Distributed Databases ({SDD-1})", journal = j-TODS, volume = "5", number = "1", pages = "18--51", month = mar, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; Distributed/fault.tolerant.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Parallel/Multi.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1980-5-1/p18-bernstein/p18-bernstein.pdf; http://www.acm.org/pubs/citations/journals/tods/1980-5-1/p18-bernstein/", abstract = "This paper presents the concurrency control strategy of SDD-1. SDD-1, a System for Distributed Databases, is a prototype distributed database system being developed by Computer Corporation of America. In SDD-1, portions of data distributed throughout a network may be replicated at multiple sites. The SDD-1 concurrency control guarantees database consistency in the face of such distribution and replication.\par This paper is one of a series of companion papers on SDD-1 [4, 10, 12, 21].", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency control; conflict graph; data base systems; distributed database system; serializability; synchronization; timestamps", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Concurrency}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Distributed databases}", } @Article{Bernstein:1980:CCC, author = "Philip A. Bernstein and David W. Shipman", title = "Correctness of Concurrency Control Mechanisms in a System for Distributed Databases ({SDD-1})", journal = j-TODS, volume = "5", number = "1", pages = "52--68", month = mar, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1980-5-1/p52-bernstein/p52-bernstein.pdf; http://www.acm.org/pubs/citations/journals/tods/1980-5-1/p52-bernstein/", abstract = "This paper presents a formal analysis of the concurrency control strategy of SDD-1. SDD-1, a System for Distributed Databases, is a prototype distributed database system being developed by Computer Corporation of America. In SDD-1, portions of data distributed throughout a network may be replicated at multiple sites. The SDD-1 concurrency control guarantees database consistency in the face of such distribution and replication.\par This paper is one of a series of companion papers on SDD-1 [2, 8].", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "conflict graph; correctness of concurrency control; data base systems; distributed database system; serializability theory", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Distributed databases}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Concurrency}", } @Article{Gopalakrishna:1980:PEA, author = "V. Gopalakrishna and C. E. {Veni Madhavan}", title = "Performance Evaluation of Attribute-Based Tree Organization", journal = j-TODS, volume = "5", number = "1", pages = "69--87", month = mar, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1980-5-1/p69-gopalakrishna/p69-gopalakrishna.pdf; http://www.acm.org/pubs/citations/journals/tods/1980-5-1/p69-gopalakrishna/", abstract = "A modified version of the multiple attribute tree (MAT) database organization, which uses a compact directory, is discussed. An efficient algorithm to process the directory for carrying out the node searches is presented. Statistical procedures are developed to estimate the number of nodes searched and the number of data blocks retrieved for most general and complex queries. The performance of inverted file and modified MAT organizations are compared using six real-life databases and four types of query complexities. Careful tradeoffs are established in terms of storage and access times for directory and data, query complexities, and database characteristics.", acknowledgement = ack-nhfb, annote = "A version of the multiple attribute tree (MAT) database organization. Statistical procedures are developed to estimate the number of nodes searched and the number of data blocks retrieved. The performance of inverted file and modified MAT organizations are compared.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "access time; average retrieval time per query; data base systems; database organization; database performance; directory search time; modified multiple attribute tree; query complexity; secondary index organization", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}; Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3)", } @Article{Denning:1980:FPF, author = "Dorothy E. Denning and Jan Schl{\"o}rer", title = "Fast Procedure for Finding a Tracker in a Statistical Database", journal = j-TODS, volume = "5", number = "1", pages = "88--102", month = mar, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1980-5-1/p88-denning/p88-denning.pdf; http://www.acm.org/pubs/citations/journals/tods/1980-5-1/p88-denning/", abstract = "To avoid trivial compromises, most on-line statistical databases refuse to answer queries for statistics about small subgroups. Previous research discovered a powerful snooping tool, the tracker, with which the answers to these unanswerable queries are easily calculated. However, the extent of this threat was not clear, for no one had shown that finding a tracker is guaranteed to be easy.\par This paper gives a simple algorithm for finding a tracker when the maximum number of identical records is not too large. The number of queries required to find a tracker is at most {$ O(\log_2 S) $} queries, where {$S$} is the number of distinct records possible. Experimental results show that the procedure often finds a tracker with just a few queries. The threat posed by trackers is therefore considerable.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "confidentiality; data base systems; data security; database security; statistical database; tracker", subject = "Information Systems --- Database Management --- Database Applications (H.2.8): {\bf Statistical databases}; Information Systems --- Database Management --- Database Administration (H.2.7): {\bf Security, integrity, and protection}", } @Article{Menasce:1980:LPR, author = "Daniel A. Menasc{\'e} and Gerald J. Popek and Richard R. Muntz", title = "A Locking Protocol for Resource Coordination in Distributed Databases", journal = j-TODS, volume = "5", number = "2", pages = "103--138", month = jun, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; Distributed/fault.tolerant.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1980-5-2/p103-menasce/p103-menasce.pdf; http://www.acm.org/pubs/citations/journals/tods/1980-5-2/p103-menasce/", abstract = "A locking protocol to coordinate access to a distributed database and to maintain system consistency throughout normal and abnormal conditions is presented. The proposed protocol is robust in the face of crashes of any participating site, as well as communication failures. Recovery from any number of failures during normal operation or any of the recovery stages is supported. Recovery is done in such a way that maximum forward progress is achieved by the recovery procedures. Integration of virtually any locking discipline including predicate lock methods is permitted by this protocol. The locking algorithm operates, and operates correctly, when the network is partitioned, either intentionally or by failure of communication lines. Each partition is able to continue with work local to it, and operation merges gracefully when the partitions are reconnected.\par A subroutine of the protocol, that assures reliable communication among sites, is shown to have better performance than two-phase commit methods. For many topologies of interest, the delay introduced by the overall protocol is not a direct function of the size of the network. The communications cost is shown to grow in a relatively slow, linear fashion with the number of sites participating in the transaction. An informal proof of the correctness of the algorithm is also presented in this paper.\par The algorithm has as its core a centralized locking protocol with distributed recovery procedures. A centralized controller with local appendages at each site coordinates all resource control, with requests initiated by application programs at any site. However, no site experiences undue load. Recovery is broken down into three disjoint mechanisms: for single node recovery, merge of partitions, and reconstruction of the centralized controller and tables. The disjointness of the mechanisms contributes to comprehensibility and ease of proof.\par The paper concludes with a proposal for an extension aimed at optimizing operation of the algorithm to adapt to highly skewed distributions of activity. The extension applies nicely to interconnected computer networks.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency; consistency; crash recovery; distributed databases; locking protocol", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Concurrency}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Distributed databases}", } @Article{Bayer:1980:PRD, author = "R. Bayer and H. Heller and A. Reiser", title = "Parallelism and Recovery in Database Systems", journal = j-TODS, volume = "5", number = "2", pages = "139--156", month = jun, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1980-5-2/p139-bayer/p139-bayer.pdf; http://www.acm.org/pubs/citations/journals/tods/1980-5-2/p139-bayer/", abstract = "In this paper a new method to increase parallelism in database systems is described. Use is made of the fact that for recovery reasons, we often have two values for one object in the database--the new one and the old one. Introduced and discussed in detail is a certain scheme by which readers and writers may work simultaneously on the same object. It is proved that transactions executed according to this scheme have the correct effect; i.e., consistency is preserved. Several variations of the basic scheme which are suitable depending on the degree of parallelism required, are described.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency; consistency; data base systems; deadlock; integrity; recovery; synchronization; transaction; two phase locking", subject = "Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Recovery and restart}; Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Deadlock avoidance}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Concurrency}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Transaction processing}", } @Article{Navathe:1980:SAD, author = "Shamkant B. Navathe", title = "Schema Analysis for Database Restructuring", journal = j-TODS, volume = "5", number = "2", pages = "157--184", month = jun, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: Proceedings of the Third Conference on Very Large Databases, Morgan Kaufman pubs. (Los Altos CA), 1977.", URL = "http://www.acm.org/pubs/articles/journals/tods/1980-5-2/p157-navathe/p157-navathe.pdf; http://www.acm.org/pubs/citations/journals/tods/1980-5-2/p157-navathe/", abstract = "The problem of generalized restructuring of databases has been addressed with two limitations: first, it is assumed that the restructuring user is able to describe the source and target databases in terms of the implicit data model of a particular methodology; second, the restructuring user is faced with the task of judging the scope and applicability of the defined types of restructuring to his database implementation and then of actually specifying his restructuring needs by translating them into the restructuring operations on a foreign data model. A certain amount of analysis of the logical and physical structure of databases must be performed, and the basic ingredients for such an analysis are developed here. The distinction between hierarchical and nonhierarchical data relationships is discussed, and a classification for database schemata is proposed. Examples are given to illustrate how these schemata arise in the conventional hierarchical and network systems. Application of the schema analysis methodology to restructuring specification is also discussed. An example is presented to illustrate the different implications of restructuring three seemingly identical database structures.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems; data model; data relationships; data semantics; data structure; database; database design; database management systems; database restructuring; graphical representation of data; schema; stored data", subject = "Information Systems --- Database Management (H.2); Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Schema and subschema}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}", } @Article{Mylopoulos:1980:LFD, author = "John Mylopoulos and Philip A. Bernstein and Harry K. T. Wong", title = "A Language Facility for Designing Database-Intensive Applications", journal = j-TODS, volume = "5", number = "2", pages = "185--207", month = jun, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib; Object/Nierstrasz.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1980-5-2/p185-mylopoulos/p185-mylopoulos.pdf; http://www.acm.org/pubs/citations/journals/tods/1980-5-2/p185-mylopoulos/", abstract = "TAXIS, a language for the design of interactive information systems (e.g., credit card verification, student-course registration, and airline reservations) is described. TAXIS offers (relational) database management facilities, a means of specifying semantic integrity constraints, and an exception-handling mechanism, integrated into a single language through the concepts of {\em class, property}, and the {\em IS-A\/} (generalization) {\em relationship}. A description of the main constructs of TAXIS is included and their usefulness illustrated with examples.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "abstract data type; applications programming; exception handling; information system; relational data model; semantic network", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Languages (H.2.3)", } @Article{Lozinskii:1980:CRR, author = "Eliezer L. Lozinskii", title = "Construction of Relations in Relational Databases", journal = j-TODS, volume = "5", number = "2", pages = "208--224", month = jun, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1980-5-2/p208-lozinskii/p208-lozinskii.pdf; http://www.acm.org/pubs/citations/journals/tods/1980-5-2/p208-lozinskii/", abstract = "Using a nonprocedural language for query formulation requires certain automatization of a query answering process. Given a query for creation of a new relation, the problem is to find an efficient procedure which produces this relation from a given relational database. The author concentrates upon sequences of join operations which losslessly produce a relation required by a query. A new property of such sequences is analyzed which provides a basis for the presented algorithms that construct an efficient join procedure. The algorithms have polynomial complexity. A modified AND\slash OR graph is used for the display of a given set of dependencies and a collection of relations representing a database.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; AND/OR graphs; data base systems; lossless joins; query answering; relational databases", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}", } @Article{Stonebraker:1980:RDS, author = "Michael Stonebraker", title = "Retrospection on a Database System", journal = j-TODS, volume = "5", number = "2", pages = "225--240", month = jun, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Reprinted in \cite{Stonebraker:1988:RDS}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1980-5-2/p225-stonebraker/p225-stonebraker.pdf; http://www.acm.org/pubs/citations/journals/tods/1980-5-2/p225-stonebraker/", abstract = "This paper describes the implementation history of the INGRES database system. It focuses on mistakes that were made in progress rather than on eventual corrections. Some attention is also given to the role of structured design in a database system implementation and to the problem of supporting nontrivial users. Lastly, miscellaneous impressions of UNIX, the PDP-11, and data models are given.", acknowledgement = ack-nhfb, annote = "The implementation history of the INGRES database system. The role of structured design in a database system implementation, impressions of UNIX, the PDP-11, and data models are given.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency; data base systems, history evaluation, Ingres, TODS; integrity; nonprocedural languages; protection; recovery; relational databases", subject = "Information Systems --- Database Management --- Systems (H.2.4); Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Recovery and restart}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Concurrency}", } @Article{Beeri:1980:MPF, author = "Catriel Beeri", title = "On the Membership Problem for Functional and Multivalued Dependencies in Relational Databases", journal = j-TODS, volume = "5", number = "3", pages = "241--259", month = sep, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; Distributed/gesturing.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1980-5-3/p241-beeri/p241-beeri.pdf; http://www.acm.org/pubs/citations/journals/tods/1980-5-3/p241-beeri/", abstract = "The problem of whether a given dependency in a database relation can be derived from a given set of dependencies is investigated. We show that the problem can be decided in polynomial time when the given set consists of either multivalued dependencies only or of both functional and multivalued dependencies and the given dependency is also either a functional or a multivalued dependency. These results hold when the derivations are restricted not to use the complementation rule.", acknowledgement = ack-nhfb, annote = "The problem of whether a given dependency in a database relation can be derived from a given set of dependencies is investigated. We show that the problem can be decided in polynomial time when the given set consists of either multivalued dependencies only or of both functional and multivalued dependencies and the given dependency is also either a functional or a multivalued dependency. These results hold when the derivations are restricted not to use the complementation rule.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems; functional dependency; inference rule; membership; multivalued dependency; relations", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}", } @Article{Klug:1980:CCR, author = "A. Klug", title = "Calculating Constraints on Relational Expressions", journal = j-TODS, volume = "5", number = "3", pages = "260--290", month = sep, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1980-5-3/p260-klug/p260-klug.pdf; http://www.acm.org/pubs/citations/journals/tods/1980-5-3/p260-klug/", abstract = "This paper deals with the problem of determining which of a certain class of constraints hold on a given relational algebra expression where the base relations come from a given schema. The class of constraints includes functional dependencies, equality of domains, and constancy of domains. The relational algebra consists of projection, selection, restriction, cross product, union, and difference. The problem as given is undecidable, but if set difference is removed from the algebra, there is a solution. Operators specifying a closure function (similar to functional dependency closure on one relation) are defined; these will generate exactly the set of constraints valid on the given relational algebra expression. We prove that the operators are sound and complete.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "completeness; constraints; data base systems; derivation rules; functional dependencies; Views", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}", } @Article{Denning:1980:SSD, author = "Dorothy E. Denning", title = "Secure Statistical Databases with Random Sample Queries", journal = j-TODS, volume = "5", number = "3", pages = "291--315", month = sep, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1980-5-3/p291-denning/p291-denning.pdf; http://www.acm.org/pubs/citations/journals/tods/1980-5-3/p291-denning/", abstract = "A new inference control, called random sample queries, is proposed for safeguarding confidential data in on-line statistical databases. The random sample queries control deals directly with the basic principle of compromise by making it impossible for a questioner to control precisely the formation of query sets. Queries for relative frequencies and averages are computed using random samples drawn from the query sets. The sampling strategy permits the release of accurate and timely statistics and can be implemented at very low cost. Analysis shows the relative error in the statistics decreases as the query set size increases; in contrast, the effort required to compromise increases with the query set size due to large absolute errors. Experiments performed on a simulated database support the analysis.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "confidentiality; data base systems; database security; disclosure controls; sampling; statistical database", subject = "Information Systems --- Database Management --- Database Applications (H.2.8): {\bf Statistical databases}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}; Information Systems --- Database Management --- Database Administration (H.2.7): {\bf Security, integrity, and protection}", } @Article{Beck:1980:SMS, author = "Leland L. Beck", title = "A security mechanism for statistical database", journal = j-TODS, volume = "5", number = "3", pages = "316--338", month = sep, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1980-5-3/p316-beck/p316-beck.pdf; http://www.acm.org/pubs/citations/journals/tods/1980-5-3/p316-beck/", abstract = "The problem of user inference in statistical databases is discussed and illustrated with several examples. It is assumed that the database allows ``total'', ``average'', ``count'', and ``percentile'' queries; a query may refer to any arbitrary subset of the database. Methods for protecting the security of such a database are considered; it is shown that any scheme which gives ``statistically correct'' answers is vulnerable to penetration. A precise definition of compromisability (in a statistical sense) is given. A general model of user inference is proposed; two special cases of this model appear to contain all previously published strategies for compromising a statistical database. A method for protecting the security of such a statistical database against these types of user inference is presented and discussed. It is shown that the number of queries required to compromise the database can be made arbitrarily large by accepting moderate increases in the variance of responses to queries. A numerical example is presented to illustrate the application of the techniques discussed.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "compromisability; data base systems; data security; database inference; privacy protection; statistical databases; statistical queries", subject = "Information Systems --- Database Management --- General (H.2.0): {\bf Security, integrity, and protection**}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}; Information Systems --- Database Management --- Database Applications (H.2.8): {\bf Statistical databases}", } @Article{Lee:1980:QTF, author = "D. T. Lee and C. K. Wong", title = "Quintary Trees: a File Structure for Multidimensional Database Systems", journal = j-TODS, volume = "5", number = "3", pages = "339--353", month = sep, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; Graphics/siggraph/80.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1980-5-3/p339-lee/p339-lee.pdf; http://www.acm.org/pubs/citations/journals/tods/1980-5-3/p339-lee/", abstract = "A file structure is presented that was designed for a database system in which four types of retrieval requests (queries) are allowed: exact match, partial match, range, and partial range queries. Outlines are sketched for inserting and deleting records that require O(k plus (log N)**k) time, on the average. This structure achieves faster response time than previously known structures (for many of the queries) at the cost of extra storage.", acknowledgement = ack-nhfb, annote = "Four types of retrieval (queries) are allowed: exact match, partial match, range, and partial range queries. Faster response time at the cost of extra storage.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems; data processing --- data structures; database system; exact match queries; file maintenance; information retrieval; key; multidimensional space; queries; range search; search", subject = "Data --- Data Structures (E.1): {\bf Trees}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}; Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}; Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3)", } @Article{Kung:1980:CMB, author = "H. T. Kung and Philip L. Lehman", title = "Concurrent Manipulation of Binary Search Trees", journal = j-TODS, volume = "5", number = "3", pages = "354--382", month = sep, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1980-5-3/p354-kung/p354-kung.pdf; http://www.acm.org/pubs/citations/journals/tods/1980-5-3/p354-kung/", abstract = "The concurrent manipulation of a binary search tree is considered in this paper. The systems presented can support any number of concurrent processes which perform searching, insertion, deletion, and rotation (reorganization) on the tree, but allow any process to lock only a constant number of nodes at any time. Also, in the systems, searches are essentially never blocked. The concurrency control techniques introduced in the paper include the use of special nodes and pointers to redirect searches, and the use of copies of sections of the tree to introduce many changes simultaneously and therefore avoid unpredictable interleaving. Methods developed in this paper may provide new insights into other problems in the area of concurrent database manipulation.", acknowledgement = ack-nhfb, annote = "Operations on tries are defined so that concurrency of access is possible while the number of locked nodes is minimal.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "binary search trees; concurrency controls; concurrent algorithm; consistency; correctness; data processing; data structures; databases; locking protocols", subject = "Data --- Data Structures (E.1): {\bf Trees}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Concurrency}", } @Article{Denning:1980:CLQ, author = "D. E. Denning", title = "Corrigenda on Linear Queries in Statistical Databases", journal = j-TODS, volume = "5", number = "3", pages = "383--383", month = sep, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibsource = "Database/Wiederhold.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", annote = "refers to Schwartz 1979 TODS.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Hsiao:1980:TFT, author = "David K. Hsiao", title = "{TODS} --- the first three years {(1976\&ndash1978)}", journal = j-TODS, volume = "5", number = "4", pages = "385--403", month = dec, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1980-5-4/p385-hsiao/p385-hsiao.pdf; http://www.acm.org/pubs/citations/journals/tods/1980-5-4/p385-hsiao/", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", subject = "General Literature --- General (A.0)", } @Article{Armstrong:1980:DFD, author = "W. W. Armstrong and C. Delobel", title = "Decompositions and Functional Dependencies in Relations", journal = j-TODS, volume = "5", number = "4", pages = "404--430", month = dec, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1980-5-4/p404-armstrong/p404-armstrong.pdf; http://www.acm.org/pubs/citations/journals/tods/1980-5-4/p404-armstrong/", abstract = "A general study is made of two basic integrity constraints on relations: functional and multivalued dependencies. The latter are studied via an equivalent concept: decompositions. A model is constructed for any possible combination of functional dependencies and decompositions. The model embodies some decompositions as unions of relations having different schemata of functional dependencies. This suggests a new, stronger integrity constraint, the degenerate decomposition. More generally, the theory demonstrates the importance of using the union operation in database design and of allowing different schemata on the operands of a union. Techniques based on the union lead to a method for solving the problem of membership of a decomposition in the closure of a given set of functional dependencies and decompositions. The concept of antiroot is introduced as a tool for describing families of decompositions, and its fundamental importance for database design is indicated.", acknowledgement = ack-nhfb, annote = "A general study is made of two basic integrity constrains, functional and multivalued dependencies, via an equivalent concept: decompositions.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems; decomposition; functional dependency; integrity constraint; multivalued dependency; relational database", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}", } @Article{Hammer:1980:RMS, author = "Michael Hammer and David Shipman", title = "Reliability Mechanisms for {SDD-1}: a System for Distributed Databases", journal = j-TODS, volume = "5", number = "4", pages = "431--466", month = dec, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Distributed/fault.tolerant.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1980-5-4/p431-hammer/p431-hammer.pdf; http://www.acm.org/pubs/citations/journals/tods/1980-5-4/p431-hammer/", abstract = "This paper presents the reliability mechanisms of SDD-1, a prototype distributed database system being developed by the Computer Corporation of America. Reliability algorithms in SDD-1 center around the concept of the Reliable Network (RelNet). The RelNet is a communications medium incorporating facilities for site status monitoring, event timestamping, multiply buffered message delivery, and the atomic control of distributed transactions.\par This paper is one of a series of companion papers on SDD-1 [3, 4, 6, 13].", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "atomicity; data base systems; distributed databases; recovery; reliability", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Distributed databases}; Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Recovery and restart}", } @Article{Schloer:1980:SSD, author = "Jan Schl{\"o}er", title = "Security of statistical databases: multidimensional transformation", journal = j-TODS, volume = "5", number = "4", pages = "467--492", month = dec, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/citations/journals/tods/1980-5-4/p467-schler/", abstract = "Statistical evaluation of databases which contain personal records may entail risks for the confidentiality of the individual records. The risk has increased with the availability of flexible interactive evaluation programs which permit the use of trackers, the most dangerous class of snooping tools known. A class of trackers, called union trackers, is described. They permit reconstruction of the entire database without supplementary knowledge and include the general tracker recently described as a special case. For many real statistical databases the overwhelming majority of definable sets of records will form trackers. For such databases a random search for a tracker is likely to succeed rapidly. Individual trackers are redefined and counted and their cardinalities are investigated. If there are $n$ records in the database, then most individual trackers employ innocent cardinalities near $ n / 3 $, making them difficult to detect. Disclosure with trackers usually requires little effort per retrieved data element.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "confidentiality; database security; security; statistical database; tracker", subject = "Information Systems --- Database Management --- Database Applications (H.2.8): {\bf Statistical databases}; Information Systems --- Database Management --- Database Administration (H.2.7): {\bf Security, integrity, and protection}", xxtitle = "Disclosure from Statistical Databases: Quantitative Aspects of Trackers", } @Article{Herot:1980:SMD, author = "Christopher F. Herot", title = "Spatial Management of Data", journal = j-TODS, volume = "5", number = "4", pages = "493--513", month = dec, year = "1980", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; Distributed/gesturing.bib; Graphics/imager/imager.80.bib; Graphics/siggraph/80.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1980-5-4/p493-herot/p493-herot.pdf; http://www.acm.org/pubs/citations/journals/tods/1980-5-4/p493-herot/", abstract = "Spatial data management is a technique for organizing and retrieving information by positioning it in a graphical data space (GDS). This graphical data space is viewed through a color raster-scan display which enables users to traverse the GDS surface or zoom into the image to obtain greater detail. In contrast to conventional database management systems, in which users access data by asking questions in a formal query language, a spatial data management system (SDMS) presents the information graphically in a form that seems to encourage browsing and to require less prior knowledge of the contents and organization of the database.\par This paper presents an overview of the SDMS concept and describes its implementation in a prototype system for retrieving information from both a symbolic database management system and an optical videodisk.", acknowledgement = ack-nhfb, annote = "Organizing and retrieving information by positioning it in a graphical data space viewed through a color display. An overview of the SDMS concept and describes its implementation in a prototype system.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer graphics; database query languages; graphical/programming language, query language, Man-Machine Communications interaction, data base systems; graphics languages; man-machine interaction", subject = "Information Systems --- Database Management --- Database Applications (H.2.8): {\bf Spatial databases and GIS}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Query languages}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}", } @Article{Zaniolo:1981:DRD, author = "Carlo Zaniolo and Michel A. Melkanoff", title = "On the Design of Relational Database Schemata", journal = j-TODS, volume = "6", number = "1", pages = "1--47", month = mar, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68B15 (68H05)", MRnumber = "82b:68019", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; Distributed/gesturing.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-1/p1-zaniolo/p1-zaniolo.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-1/p1-zaniolo/", abstract = "The purpose of this paper is to present a new approach to the conceptual design of relational databases based on the complete relatability conditions (CRCs).\par It is shown that current database design methodology based upon the elimination of anomalies is not adequate. In contradistinction, the CRCs are shown to provide a powerful criticism for decomposition. A decomposition algorithm is presented which (1) permits decomposition of complex relations into simple, well-defined primitives, (2) preserves all the original information, and (3) minimizes redundancy.\par The paper gives a complete derivation of the CRCs, beginning with a unified treatment of functional and multivalued dependencies, and introduces the concept of elementary functional dependencies and multiple elementary multivalued dependencies. Admissibility of covers and validation of results are also discussed, and it is shown how these concepts may be used to improve the design of 3NF schemata. Finally, a convenient graphical representation is proposed, and several examples are described in detail to illustrate the method.", acknowledgement = ack-nhfb, annote = "The conceptual design of relational databases based on the complete reliability conditions (CRCs). A unified treatment of functional and multivalued dependencies.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems; decomposition; functional dependencies; minimal covers; multivalued dependencies; relational databases; schema design", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Schema and subschema}", } @Article{Lien:1981:HSR, author = "Y. Edmund Lien", title = "Hierarchical Schemata for Relational Databases", journal = j-TODS, volume = "6", number = "1", pages = "48--69", month = mar, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68B15 (68H05)", MRnumber = "82b:68015", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; Distributed/gesturing.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-1/p48-lien/p48-lien.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-1/p48-lien/", abstract = "Most database design methods for the relational model produce a flat database, that is, a family of relations with no explicit interrelational connections. The user of a flat database is likely to be unaware of certain interrelational semantics. In contrast, the entity-relationship model provides schema graphs as a description of the database, as well as for navigating the database. Nevertheless, the user of an entity-relationship database may still commit semantic errors, such as performing a lossy join. This paper proposes a nonflat, or hierarchical, view of relational databases. Relations are grouped together to form {\em relation hierarchies\/} in which lossless joins are explicitly shown whereas lossy joins are excluded. Relation hierarchies resemble the schema graphs in the entity-relationship model.\par An approach to the design of relation hierarchies is outlined in the context of data dependencies and relational decomposition. The approach consists of two steps; each is described as an algorithm. Algorithm DEC decomposes a given universal relation according to a given set of data dependencies and produces a set of nondecomposable relation schemes. This algorithm differs from its predecessors in that it produces no redundant relation schemes. Algorithm RH further structures the relation schemes produced by Algorithm DEC into a hierarchical schema. These algorithms can be useful software tools for database designers.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems; database design; lossless join; multivalued dependency; relation normalization", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Schema and subschema}", } @Article{Chamberlin:1981:SRT, author = "D. D. Chamberlin and M. M. Astrahan and W. F. King and R. A. Lorie and J. W. Mehl and T. G. Price and M. Schkolnick and P. Griffiths Selinger and D. R. Slutz and B. W. Wade and R. A. Yost", title = "Support for Repetitive Transactions and Ad Hoc Queries in {System R}", journal = j-TODS, volume = "6", number = "1", pages = "70--94", month = mar, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: IBM Research Report RJ2551(33151), May. 1979.", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-1/p70-chamberlin/p70-chamberlin.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-1/p70-chamberlin/", abstract = "System R supports a high-level relational user language called SQL which may be used by ad hoc users at terminals or as an embedded data sublanguage in PL/I or COBOL. Host-language programs with embedded SQL statements are processed by the System R precompiler which replaces the SQL statements by calls to a machine-language access module. The precompilation approach removes much of the work of parsing, name binding, and access path selection from the path of a running program, enabling highly efficient support for repetitive transactions. Ad hoc queries are processed by a similar approach of name binding and access path selection which takes place on-line when the query is specified. By providing a flexible spectrum of binding times, System R permits transaction-oriented programs and ad hoc query users to share a database without loss of efficiency.\par System R is an experimental database management system designed and built by members of the IBM San Jose Research Laboratory as part of a research program on the relational model of data. This paper describes the architecture of System R, and gives some preliminary measurements of system performance in both the ad hoc query and the ``canned program'' environments.", acknowledgement = ack-nhfb, annote = "Embedded SQL statements are processed by the System R precompiler enabling highly efficient support for repetitive transactions. Ad hoc query is specified. By providing a flexible spectrum of binding times. System R permits transaction-oriented programs and ad hoc query users to share a database without loss of efficiency.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "compilation; data base systems, TODS ad-hoc relation database IBM San Jose; performance measurements; query languages; relational database systems; transaction processing", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf System R}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}", } @Article{Schlorer:1981:SSD, author = "Jan Schl{\"o}rer", title = "Security of Statistical Databases: Multidimensional Transformation", journal = j-TODS, volume = "6", number = "1", pages = "95--112", month = mar, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68B15", MRnumber = "82b:68018", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-1/p95-schlorer/p95-schlorer.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-1/p95-schlorer/", abstract = "The concept of multidimensional transformation of statistical databases is described. A given set of statistical output may be compatible with more than one statistical database. A transformed database $ D' $ is a database which (1) differs from the original database $D$ in its record content, for (2) produces, within certain limits, the same statistical output as the original database. For a transformable database $D$ there are two options: One may physically transform $D$ into a suitable database $ D' $, or one may release only that output which will not permit the users to decide whether it comes from $D$ or $ D' $. The second way is, of course, the easier one. Basic structural requirements for transformable statistical databases are investigated. Advantages, drawbacks, and open questions are discussed.", acknowledgement = ack-nhfb, annote = "A transformed database differs from the original database in its record content but produces within certain limits the same statistical output.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "confidentiality; data base systems; data processing --- security of data; database; database security; matrices; security; statistical database", subject = "Information Systems --- Database Management --- Database Applications (H.2.8): {\bf Statistical databases}", } @Article{Chin:1981:SDD, author = "Francis Y. Chin and Gultekin {\"O}zsoyo{\u{g}}lu", title = "Statistical Database Design", journal = j-TODS, volume = "6", number = "1", pages = "113--139", month = mar, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-1/p113-chin/p113-chin.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-1/p113-chin/", abstract = "The security problem of a statistical database is to limit the use of the database so that no sequence of statistical queries is sufficient to deduce confidential or private information. In this paper it is suggested that the problem be investigated at the conceptual data model level. The design of a statistical database should utilize a statistical security management facility to enforce the security constraints at the conceptual model level. Information revealed to users is well defined in the sense that it can at most be reduced to nondecomposable information involving a group of individuals. In addition, the design also takes into consideration means of storing the query information for auditing purposes, changes in the database, users' knowledge, and some security measures.", acknowledgement = ack-nhfb, annote = "Limit the use of the database so that no sequence of statistical queries is sufficient to deduce confidential information at the conceptual data model level.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "compromisability; conceptual databases model; data base systems; data processing --- security of data; database design; protection; security; statistical database", subject = "Information Systems --- Database Management --- Database Applications (H.2.8): {\bf Statistical databases}", } @Article{Shipman:1981:FDM, author = "David W. Shipman", title = "The Functional Data Model and the Data Language {DAPLEX}", journal = j-TODS, volume = "6", number = "1", pages = "140--173", month = mar, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/bibdb.bib; Database/Graefe.bib; Distributed/gesturing.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/Functional.bib; Misc/is.bib", note = "Reprinted in \cite{Stonebraker:1988:RDS}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-1/p140-shipman/p140-shipman.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-1/p140-shipman/", abstract = "DAPLEX is a database language which incorporates: \par a formulation of data in terms of entities;\par a functional representation for both actual and virtual data relationships;\par a rich collection of language constructs for expressing entity selection criteria;\par a notion of subtype/supertype relationships among entity types.\par This paper presents and motivates the DAPLEX language and the underlying data model on which it is based.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming languages; data base systems; database; functional data model; language", subject = "Information Systems --- Database Management --- Languages (H.2.3): {\bf DAPLEX}; Information Systems --- Database Management --- Languages (H.2.3)", } @Article{Rosenberg:1981:TSO, author = "Arnold L. Rosenberg and Lawrence Snyder", title = "Time- and Space-Optimality in {B-Trees}", journal = j-TODS, volume = "6", number = "1", pages = "174--193", month = mar, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68B15 (68E10)", MRnumber = "82m:68048", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-1/p174-rosenberg/p174-rosenberg.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-1/p174-rosenberg/", abstract = "A B-tree is {\em compact\/} if it is minimal in number of nodes, hence has optimal space utilization, among equally capacious B-trees of the same order. The space utilization of compact B-trees is analyzed and compared with that of noncompact B-trees and with (node)-visit-optimal B-trees, which minimize the expected number of nodes visited per key access. Compact B-trees can be as much as a {\em factor\/} of 2.5 more space efficient than visit-optimal B-trees; and the node-visit cost of a compact tree is never more than 1 + the node-visit cost of an optimal tree. The utility of initializing a B-tree to be compact (which initialization can be done in time linear in the number of keys if the keys are presorted) is demonstrated by comparing the space utilization of a compact tree that has been augmented by random insertions with that of a tree that has been grown entirely by random insertions. Even after increasing the number of keys by a modest amount, the effects of compact initialization are still felt. Once the tree has grown so large that these effects are no longer discernible, the tree can be expeditiously compacted in place using an algorithm presented here; and the benefits of compactness resume.", acknowledgement = ack-nhfb, annote = "A Btree is compact if it is minimal in number of nodes. Compact Btree initialization can be done in time linear in the number of keys if the keys are presorted. Study indicates that space-optimal trees are nearly time optimal, but time-optimal trees are nearly space pessimal.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "2,3-tree; B-tree; bushy B-tree; compact B-tree; data processing; node-visit cost; space utilization", subject = "Mathematics of Computing --- Discrete Mathematics --- Graph Theory (G.2.2): {\bf Trees}", } @Article{Scholl:1981:NFO, author = "Michel Scholl", title = "New File Organizations Based on Dynamic Hashing", journal = j-TODS, volume = "6", number = "1", pages = "194--211", month = mar, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68B15", MRnumber = "82c:68016", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-1/p194-scholl/p194-scholl.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-1/p194-scholl/", abstract = "New file organizations based on hashing and suitable for data whose volume may vary rapidly recently appeared in the literature. In the three schemes which have been independently proposed, rehashing is avoided, storage space is dynamically adjusted to the number of records actually stored, and there are no overflow records. Two of these techniques employ an index to the data file. Retrieval is fast and storage utilization is low.\par In order to increase storage utilization, we introduce two schemes based on a similar idea and analyze the performance of the second scheme. Both techniques use an index of much smaller size. In both schemes, overflow records are accepted. The price which has to be paid for the improvement in storage utilization is a slight access cost degradation.", acknowledgement = ack-nhfb, annote = "In the three schemes which proposed, rehashing is avoided, storage space is dynamically adjusted to the number of records actually stored, and there are no overflow records. Two of these techniques employ an index to the data file.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data processing; data structure; dynamic hashing; file organization; hashing; linear splitting", subject = "Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}", } @Article{Kung:1981:OMC, author = "H. T. Kung and John T. Robinson", title = "On Optimistic Methods for Concurrency Control", journal = j-TODS, volume = "6", number = "2", pages = "213--226", month = jun, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Compiler/garbage.collection.bib; Compiler/Heaps.bib; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/misc.1.bib; Misc/real.time.bib; Object/Nierstrasz.bib", note = "Reprinted in \cite{Stonebraker:1988:RDS}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-2/p213-kung/p213-kung.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-2/p213-kung/", abstract = "Most current approaches to concurrency control in database systems rely on locking of data objects as a control mechanism. In this paper, two families of nonlocking concurrency controls are presented. The methods used are ``optimistic'' in the sense that they rely mainly on transaction backup as a control mechanism, ``hoping'' that conflicts between transactions will not occur. Applications for which these methods should be more efficient than locking are discussed.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency controls; data base systems, concurrency other; databases; transaction processing", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Concurrency}", } @Article{Boral:1981:PAS, author = "Haran Boral and David J. DeWitt", title = "Processor Allocation Strategies for Multiprocessor Database Machines", journal = j-TODS, volume = "6", number = "2", pages = "227--254", month = jun, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Parallel/Multi.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-2/p227-boral/p227-boral.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-2/p227-boral/", abstract = "In this paper four alternative strategies for assigning processors to queries in multiprocessor database machines are described and evaluated. The results demonstrate that SIMD database machines are indeed a poor design when their performance is compared with that of the three MIMD strategies presented. \par Also introduced is the application of data-flow machine techniques to the processing of relational algebra queries. A strategy that employs data-flow techniques is shown to be superior to the other strategies described by several experiments. Furthermore, if the data-flow query processing strategy is employed, the results indicate that a two-level storage hierarchy (in which relations are paged between a shared data cache and mass storage) does not have a significant impact on performance.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "associative processors; back-end computers; computer architecture; data base systems, Direct TODS; data-flow computers; database machines; database management; parallel processors; processor scheduling", subject = "Information Systems --- Database Management --- Database Machines (H.2.6); Information Systems --- Database Management (H.2)", } @Article{Su:1981:TDT, author = "Stanley Y. W. Su and Herman Lam and Der Her Lo", title = "Transformation of Data Traversals and Operations in Application Programs to Account for Semantic Changes of Databases", journal = j-TODS, volume = "6", number = "2", pages = "255--294", month = jun, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-2/p255-su/p255-su.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-2/p255-su/", abstract = "This paper addresses the problem of application program conversion to account for changes in database semantics that result in changes in the schema and database contents. With the observation that the existing data models can be viewed as alternative ways of modeling the same database semantics, a methodology of application program analysis and conversion based on an existing-DBMS-model-and schema-independent representation of both the database and programs is presented. In this methodology, the source and target databases are described in terms of the association types of a semantic association model. The structural properties, the integrity constraints, and the operational characteristics (storage operation behaviors) of the association types are more explicitly defined to reveal the semantics that is generally hidden in application programs. The explicit descriptions of the source and target databases are used as the basis for program analysis and conversion. Application programs are described in terms of a small number of ``access patterns'' which define the data traversals and operations of the programs. In addition to the methodology, this paper (1) describes a model of a generalized application program conversion system that serves as a framework for research, (2) presents an analysis of access patterns that serve as the primitives for program description, (3) delineates some meaningful semantic changes to databases and their corresponding transformation rules for program conversion, (4) illustrates the application of these rules to two different approaches to program conversion problems, and (5) reports on the development effort undertaken at the University of Florida.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "access pattern; application program conversion; data base systems; database changes; semantic data model; transformation rules", subject = "Information Systems --- Database Management --- Database Applications (H.2.8); Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Access methods}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}", } @Article{Clemons:1981:DES, author = "Eric K. Clemons", title = "Design of an External Schema Facility to Define and Process Recursive Structures", journal = j-TODS, volume = "6", number = "2", pages = "295--311", month = jun, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-2/p295-clemons/p295-clemons.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-2/p295-clemons/", abstract = "The role of the external schema is to support user views of data and thus to provide programmers with easier data access. This author believes that an external schema facility is best based on hierarchies, both simple and recursive. After a brief introduction to an external schema facility to support simple hierarchical user views, the requirements for a facility for recursive hierarchies are listed and the necessary extensions to the external schema definition language are offered.\par Functions that must be provided for generality in definition are node specification and node control. Tree traversal functions must be provided for processing. Definitions of each and examples of use are presented.", acknowledgement = ack-nhfb, annote = "[Ahad,Yao,Choi87] A.2.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "ANSI SPARC architectures; data base systems; external schemata; recursive data structures; user views", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Schema and subschema}", } @Article{Davida:1981:DES, author = "George I. Davida and David L. Wells and John B. Kam", title = "A Database Encryption System with Subkeys", journal = j-TODS, volume = "6", number = "2", pages = "312--328", month = jun, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68B15", MRnumber = "82f:68020", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-2/p312-davida/p312-davida.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-2/p312-davida/", abstract = "A new cryptosystem that is suitable for database encryption is presented. The system has the important property of having subkeys that allow the encryption and decryption of fields within a record. The system is based on the Chinese Remainder Theorem.", acknowledgement = ack-nhfb, annote = "Subkeys allow the encryption and decryption of fields within a record. The system is based on the Chinese Remainder Theorem.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "codes, symbolic; data base systems; data security; databases; decryption; encryption; subkeys", subject = "Data --- Data Encryption (E.3)", } @Article{Ling:1981:ITN, author = "Tok Wang Ling and Frank W. Tompa and Tiko Kameda", title = "An Improved Third Normal Form for Relational Databases", journal = j-TODS, volume = "6", number = "2", pages = "329--346", month = jun, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68B15", MRnumber = "82f:68024", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-2/p329-ling/p329-ling.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-2/p329-ling/", abstract = "In this paper, we show that some Codd third normal form relations may contain ``superfluous'' attributes because the definitions of transitive dependency and prime attribute are inadequate when applied to sets of relations. To correct this, an improved third normal form is defined and an algorithm is given to construct a set of relations from a given set of functional dependencies in such a way that the superfluous attributes are guaranteed to be removed. This new normal form is compared with other existing definitions of third normal form, and the deletion normalization method proposed is shown to subsume the decomposition method of normalization.", acknowledgement = ack-nhfb, annote = "An improved third normal form is defined and an algorithm is given to construct a set of relations from a given set of functional dependencies in such a way that the superfluous attributes are guaranteed to be removed.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "covering; data base systems; database design; functional dependency; normalization; prime attribute; reconstructibility; relational schema; third normal form; transitive dependency", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Schema and subschema}", } @Article{McLean:1981:CSC, author = "Gordon {McLean, Jr.}", title = "Comments on {SDD-1} Concurrency Control Mechanisms", journal = j-TODS, volume = "6", number = "2", pages = "347--350", month = jun, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Parallel/Multi.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-2/p347-mclean/p347-mclean.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-2/p347-mclean/", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Concurrency}", } @Article{Hammer:1981:DDS, author = "Michael Hammer and Dennis Mc Leod", title = "Database Description with {SDM}: a Semantic Database Model", journal = j-TODS, volume = "6", number = "3", pages = "351--386", month = sep, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/bibdb.bib; Database/Graefe.bib; Database/Wiederhold.bib; Distributed/gesturing.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib", note = "Reprinted in \cite{Stonebraker:1988:RDS}. Also published in \cite{Zdonik:1990:ROO}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-3/p351-hammer/p351-hammer.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-3/p351-hammer/", abstract = "SDM is a high-level semantics-based database description and structuring formalism (database model) for databases. This database model is designed to capture more of the meaning of an application environment than is possible with contemporary database models. An SDM specification describes a database in terms of the kinds of entities that exist in the application environment, the classifications and groupings of those entities, and the structural interconnections among them. SDM provides a collection of high-level modeling primitives to capture the semantics of an application environment. By accommodating derived information in a database structural specification, SDM allows the same information to be viewed in several ways; this makes it possible to directly accommodate the variety of needs and processing requirements typically present in database applications. The design of the present SDM is based on our experience in using a preliminary version of it.\par SDM is designed to enhance the effectiveness and usability of database systems. An SDM database description can serve as a formal specification and documentation tool for a database; it can provide a basis for supporting a variety of powerful user interface facilities, it can serve as a conceptual database model in the database design process; and, it can be used as the database model for a new kind of database management system.", acknowledgement = ack-nhfb, annote = "SDM is a high-level semantics-based database model, to capture the meaning of an application environment. One of the papers usually referred to when discussing semantic data models. Describes a model which permits a lot of flexibility and expressiveness, and is consequently difficult to implement. Advantage is that it can be used as a specification and documentation tool. Good introduction, giving an overview of (some?, most?, all?) problems in semantic data models. The section describing SDM DDL is a bit too detailed (one needs to pick up the essential concepts like subclassing, and redundancy in model (which may be necessary to make the model easier to use)). Some discussion of inheritance is also present. Nothing much is said in the final discussion. Reasonable paper. To benefit, one needs to be careful not to get lost in the details. A detailed description of the semantic data model.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems; database definition; database management; database modeling; database models; database semantics; logical database design", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}", } @Article{Fagin:1981:NFR, author = "Ronald Fagin", title = "A Normal Form for Relational Databases That is Based on Domains and Keys", journal = j-TODS, volume = "6", number = "3", pages = "387--415", month = sep, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Compiler/prog.lang.theory.bib; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-3/p387-fagin/p387-fagin.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-3/p387-fagin/", abstract = "The new normal form for relational databases, called domain-key normal form (DK\slash NF), is defined. Also, formal definitions of insertion anomaly and deletion anomaly are presented. It is shown that a schema is in DK\slash NF if and only if it has no insertion or deletion anomalies. Unlike previously defined normal forms, DK\slash NF is not defined in terms of traditional dependencies (functional, multivalued, or join). Instead, it is defined in terms of the more primitive concepts of domain and key, along with the general concept of a ``constraint''. It is considered how the definitions of traditional normal forms might be modified by taking into consideration, for the first time, the combinatorial consequences of bounded domain sizes. It is shown that after this modification, these traditional normal forms are all implied by DK\slash NF. In particular, if all domains are infinite, then these traditional normal forms are all implied by DK\slash NF.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "anomaly; complexity; data base systems; database design; DK/NF; domain-key normal form; functional dependency; join dependency; multivalued dependency; normalization; relational database", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Normal forms}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}", } @Article{Hong:1981:AHS, author = "Y. C. Hong and Stanley Y. W. Su", title = "Associative Hardware and Software Techniques for Integrity Control", journal = j-TODS, volume = "6", number = "3", pages = "416--440", month = sep, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-3/p416-hong/p416-hong.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-3/p416-hong/", abstract = "This paper presents the integrity control mechanism of the associative processing system, CASSM. The mechanism takes advantage of the associative techniques, such as content and context addressing, tagging and marking data, parallel processing, automatic triggering of integrity control procedures, etc., for integrity control and as a result offers three significant advantages: (1) The problem of staging data in a main memory for integrity checking can be eliminated because database storage operations are verified at the place where the data are stored. (2) The backout or merging procedures are relatively easy and inexpensive in the associative system because modified copies can be substituted for the originals or may be discarded by merely changing their associated tags. (3) The database management system software is simplified because database integrity functions are handled by the associative processing system to which a mainframe computer is a front-end computer.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "assertion and trigger; associative techniques; cellular-logic devices; data base systems; database integrity; database management; integrity control; integrity control, SYWSu hardware support relational database machine TODS", subject = "Information Systems --- Database Management --- General (H.2.0): {\bf Security, integrity, and protection**}", } @Article{March:1981:FMS, author = "Salvatore T. March and Dennis G. Severance and Michael Wilens", title = "Frame Memory: a Storage Architecture to Support Rapid Design and Implementation of Efficient Databases", journal = j-TODS, volume = "6", number = "3", pages = "441--463", month = sep, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-3/p441-march/p441-march.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-3/p441-march/", abstract = "Frame memory is a virtual view of secondary storage that can be implemented with reasonable overhead to support database record storage and accessing requirements. Frame memory is designed so that its operating characteristics can be easily manipulated by either designers or design algorithms, while performance effects of such changes can be accurately predicted. Automated design procedures exist to generate and evaluate alternative database designs built upon frame memory, and the existence of these procedures establishes frames as an attractive memory management architecture for future database management systems.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "analytic modeling; data base systems; database design system; database machine; hardware support; TODS; virtual secondary storage", subject = "Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2); Software --- Operating Systems --- Storage Management (D.4.2): {\bf Secondary storage}", } @Article{vandeRiet:1981:HLP, author = "Reind P. {van de Riet} and Anthony I. Wasserman and Martin L. Kersten and Wiebren {de Jonge}", title = "High-Level Programming Features for Improving the Efficiency of a Relational Database System", journal = j-TODS, volume = "6", number = "3", pages = "464--485", month = sep, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: UCSF, Lab. of Med. Inf. Science, Tech. Rpt. 44, Feb. 1980.", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-3/p464-van_de_riet/p464-van_de_riet.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-3/p464-van_de_riet/", abstract = "This paper discusses some high-level language programming constructs that can be used to manipulate the relations of a relational database system efficiently. Three different constructs are described: (1) tuple identifiers that directly reference tuples of a relation; (2) cursors that may iterate over the tuples of a relation; and (3) markings, a form of temporary relation consisting of a set of tuple identifiers. In each case, attention is given to syntactic, semantic, and implementation considerations. \par The use of these features is first presented within the context of the programming language PLAIN, and it is then shown how these features could be used more generally to provide database manipulation capabilities in a high-level programming language. Consideration is also given to issues of programming methodology, with an important goal being the achievement of a balance between the enforcement of good programming practices and the ability to write efficient programs.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "markings; PLAIN; programming languages; programming methodology; relational algebra; relational database management", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Database (persistent) programming languages}", } @Article{Culik:1981:DMT, author = "K. {Culik II} and Th. Ottmann and D. Wood", title = "Dense multiway trees", journal = j-TODS, volume = "6", number = "3", pages = "486--512", month = sep, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68B15 (05C05)", MRnumber = "82m:68038", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/citations/journals/tods/1981-6-3/p486-culic/", abstract = "B-trees of order $m$ are a ``balanced'' class of $m$-ary trees, which have applications in the areas of file organization. In fact, they have been the only choice when balanced multiway trees are required. Although they have very simple insertion and deletion algorithms, their storage utilization, that is, the number of keys per page or node, is at worst 50 percent. In the present paper we investigate a new class of balanced $m$-ary trees, the dense multiway trees, and compare their storage utilization with that of B-trees of order $m$. \par Surprisingly, we are able to demonstrate that weakly dense multiway trees have an $ (l o g_2 N) $ insertion algorithm. We also show that inserting $ m h - 1 $ keys in ascending order into an initially empty dense multiway tree yields the complete $m$-ary tree of height $h$, and that at intermediate steps in the insertion sequence the intermediate trees can also be considered to be as dense as possible. Furthermore, an analysis of the limiting dynamic behavior of the dense $m$-ary trees under insertion shows that the average storage utilization tends to 1; that is, the trees become as dense as possible. This motivates the use of the term ``dense.''", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "B-trees; balanced trees; dense trees; multiway trees; search trees; storage utilization", subject = "Data --- Data Structures (E.1): {\bf Trees}", } @Article{Comer:1981:AHF, author = "Douglas Comer", title = "Analysis of a Heuristic for Full Trie Minimization", journal = j-TODS, volume = "6", number = "3", pages = "513--537", month = sep, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-3/p513-comer/p513-comer.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-3/p513-comer/", abstract = "A trie is a distributed-key search tree in which records from a file correspond to leaves in the tree. Retrieval consists of following a path from one root to a leaf, where the choice of edge at each node is determined by attribute values of the key. For full tries, those in which all leaves lie at the same depth, the problem of finding an ordering of attributes which yields a minimum size trie is NP-complete.\par This paper considers a ``greedy'' heuristic for constructing low-cost tries. It presents simulation experiments which show that the greedy method tends to produce tries with small size, and analysis leading to a worst case bound on approximations produced by the heuristic. It also shows a class of files for which the greedy method may perform badly, producing tries of high cost.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data processing; heuristic; trie index; trie size", subject = "Computing Methodologies --- Artificial Intelligence --- Problem Solving, Control Methods, and Search (I.2.8): {\bf Heuristic methods}", } @Article{Kent:1981:CAU, author = "W. Kent", title = "Consequences of Assuming a Universal Relation", journal = j-TODS, volume = "6", number = "4", pages = "539--556", month = dec, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/database.bib; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "See remark \cite{Ullman:1983:KCA}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-4/p539-kent/p539-kent.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-4/p539-kent/", abstract = "Although central to the current direction of dependency theory, the assumption of a universal relation is incompatible with some aspects of relational database theory and practice. Furthermore, the universal relation is itself ill defined in some important ways. And, under the universal relation assumption, the decomposition approach to database design becomes virtually indistinguishable from the synthetic approach.", acknowledgement = ack-nhfb, annote = "The assumption of a universal relation is incompatible with some aspects of relational database theory and practice. Under the universal relation assumption, the decomposition approach to database design becomes virtually indistinguishable from the synthetic approach.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems; database design; dependency theory; rational database; relational theory; universal relation", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Models and Principles --- Systems and Information Theory (H.1.1); Information Systems --- Database Management --- Logical Design (H.2.1)", } @Article{Bancilhon:1981:USR, author = "F. B. Bancilhon and N. Spyratos", title = "Update Semantics of Relational Views", journal = j-TODS, volume = "6", number = "4", pages = "557--575", month = dec, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Ai/nonmono.bib; Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "See comment \cite{Keller:1987:CBS}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-4/p557-bancilhon/p557-bancilhon.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-4/p557-bancilhon/", abstract = "A database view is a portion of the data structured in a way suitable to a specific application. Updates on views must be translated into updates on the underlying database. This paper studies the translation process in the relational model.\par The procedure is as follows: first, a ``complete'' set of updates is defined such that\par together with every update the set contains a ``return'' update, that is, one that brings the view back to the original state;\par given two updates in the set, their composition is also in the set.\par To translate a complete set, we define a mapping called a ``translator,'' that associates with each view update a unique database update called a ``translation.'' The constraint on a translation is to take the database to a state mapping onto the updated view. The constraint on the translator is to be a morphism.\par We propose a method for defining translators. Together with the user-defined view, we define a ``complementary'' view such that the database could be computed from the view and its complement. We show that a view can have many different complements and that the choice of a complement determines an update policy. Thus, we fix a view complement and we define the translation of a given view update in such a way that the complement remains invariant (``translation under constant complement''). The main result of the paper states that, given a complete set $U$ of view updates, $U$ has a translator if and only if $U$ is translatable under constant complement.", acknowledgement = ack-nhfb, annote = "A mapping called a ``translator'', associates with each view update a unique database update. A method for defining translators with the user-defined view, define a ``complementary'' view such that the database could be computed from the view and its complement. We define the translation of a given view update in such a way that the complement remains invariant. Aplies to Universal relations.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "conceptual model; data base systems; data model; data semantics; database view; relation; relational model database; update translation; view updating", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Theory of Computation --- Logics and Meanings of Programs --- Semantics of Programming Languages (F.3.2)", } @Article{Baroody:1981:OOA, author = "A. James {Baroody, Jr.} and David J. DeWitt", title = "An Object-Oriented Approach to Database System Implementation", journal = j-TODS, volume = "6", number = "4", pages = "576--601", month = dec, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Object/Nierstrasz.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-4/p576-baroody/p576-baroody.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-4/p576-baroody/", abstract = "This paper examines object-oriented programming as an implementation technique for database systems. The object-oriented approach encapsulates the representations of database entities and relationships with the procedures that manipulate them. To achieve this, we first define abstractions of the modeling constructs of the data model that describe their common properties and behavior. Then we represent the entity types and relationship types in the conceptual schema and the internal schema by objects that are instances of these abstractions. The generic procedures (data manipulation routines) that comprise the user interface can now be implemented as calls to the procedures associated with these objects.\par A generic procedure model of database implementation techniques is presented and discussed. Several current database system implementation techniques are illustrated as examples of this model, followed by a critical analysis of our implementation technique based on the use of objects. We demonstrate that the object-oriented approach has advantages of data independence, run-time efficiency due to eliminating access to system descriptors, and support for low-level views.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming, olit-db casais; data base systems; data independence; data manipulation routines; database systems; high-level languages; object-oriented programming; procedural binding", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Object-oriented databases}; Computer Systems Organization --- Computer System Implementation (C.5); Information Systems --- Database Management --- Languages (H.2.3)", } @Article{Bernstein:1981:QPS, author = "Philip A. Bernstein and Nathan Goodman and Eugene Wong and Christopher L. Reeve and James B. {Rothnie, Jr.}", title = "Query Processing in a System for Distributed Databases ({SDD-1})", journal = j-TODS, volume = "6", number = "4", pages = "602--625", month = dec, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Parallel/Multi.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-4/p602-bernstein/p602-bernstein.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-4/p602-bernstein/", abstract = "This paper describes the techniques used to optimize relational queries in the SDD-1 distributed database system. Queries are submitted to SDD-1 in a high-level procedural language called Datalanguage. Optimization begins by translating each Datalanguage query into a relational calculus form called an {\em envelope}, which is essentially an aggregate-free QUEL query. This paper is primarily concerned with the optimization of envelopes.\par Envelopes are processed in two phases. The first phase executes relational operations at various sites of the distributed database in order to delimit a subset of the database that contains all data relevant to the envelope. This subset is called a {\em reduction\/} of the database. The second phase transmits the reduction to one designated site, and the query is executed locally at that site.\par The critical optimization problem is to perform the reduction phase efficiently. Success depends on designing a good repertoire of operators to use during this phase, and an effective algorithm for deciding which of these operators to use in processing a given envelope against a given database. The principal reduction operator that we employ is called a {\em semijoin}. In this paper we define the semijoin operator, explain why semijoin is an effective reduction operator, and present an algorithm that constructs a cost-effective program of semijoins, given an envelope and a database.", acknowledgement = ack-nhfb, annote = "Techniques to optimize relational queries in the SDD-1 distributed database system. First phase executes relational operations at various sites to delimit a subset called a reduction. The second phase transmits the reduction to one designated site. The principal reduction operator, introduced here, is called a semijoin.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming --- subroutines; data base systems; distributed databases; query optimization; query processing; query processing, TODS semijoins semi-join join; relational databases; semijoins", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Distributed databases}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}", } @Article{Welty:1981:HFC, author = "Charles Welty and David W. Stemple", title = "Human Factors Comparison of a Procedural and a Nonprocedural Query Language", journal = j-TODS, volume = "6", number = "4", pages = "626--649", month = dec, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-4/p626-welty/p626-welty.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-4/p626-welty/", abstract = "Two experiments testing the ability of subjects to write queries in two different query languages were run. The two languages, SQL and TABLET, differ primarily in their procedurality; both languages use the relational data model, and their Halstead levels are similar. Constructs in the languages which do not affect their procedurality are identical. The two languages were learned by the experimental subjects almost exclusively from manuals presenting the same examples and problems ordered identically for both languages. The results of the experiments show that subjects using the more procedural language wrote difficult queries better than subjects using the less procedural language. The results of the experiments are also used to compare corresponding constructs in the two languages and to recommend improvements for these constructs.", acknowledgement = ack-nhfb, annote = "SQL and TABLET. The results show that subjects using the more procedural language wrote difficult queries better than subjects using the less procedural language.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems; database systems; human factors; procedural and nonprocedural languages; query languages", subject = "Information Systems --- Models and Principles --- User/Machine Systems (H.1.2): {\bf Human factors}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Query languages}; Information Systems --- Database Management --- Systems (H.2.4)", } @Article{Lehman:1981:ELC, author = "Philip L. Lehman and S. Bing Yao", title = "Efficient Locking for Concurrent Operations on {B-Trees}", journal = j-TODS, volume = "6", number = "4", pages = "650--670", month = dec, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-4/p650-lehman/p650-lehman.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-4/p650-lehman/", abstract = "The B-tree and its variants have been found to be highly useful (both theoretically and in practice) for storing large amounts of information, especially on secondary storage devices. We examine the problem of overcoming the inherent difficulty of concurrent operations on such structures, using a practical storage model. A single additional ``link'' pointer in each node allows a process to easily recover from tree modifications performed by other concurrent processes. Our solution compares favorably with earlier solutions in that the locking scheme is simpler (no read-locks are used) and only a (small) constant number of nodes are locked by any update process at any given time. An informal correctness proof for our system is given.", acknowledgement = ack-nhfb, annote = "A single additional `link' pointer in each node allows a process to easily recover from tree modifications performed by other concurrent processes. No read-locks are used only a (small) constant number of nodes are locked by any update process at any given time.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "B-tree; concurrenct algorithms; concurrency controls; consistency; correctness; data processing; data structures; database; index organizations; locking protocols; multiway search trees", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Concurrency}; Mathematics of Computing --- Discrete Mathematics --- Graph Theory (G.2.2): {\bf Trees}", } @Article{Larson:1981:AIS, author = "Per-{\AA}ke Larson", title = "Analysis of Index-Sequential Files with Overflow Chaining", journal = j-TODS, volume = "6", number = "4", pages = "671--680", month = dec, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68B15 (68H05)", MRnumber = "82m:68044", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1981-6-4/p671-larson/p671-larson.pdf; http://www.acm.org/pubs/citations/journals/tods/1981-6-4/p671-larson/", abstract = "The gradual performance deterioration caused by deletions from and insertions into an index-sequential file after loading is analyzed. The model developed assumes that overflow records are handled by chaining. Formulas for computing the expected number of overflow records and the expected number of additional accesses caused by the overflow records for both successful and unsuccessful searches are derived.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "analysis of algorithms; analytic model; data processing, TODS ISAM; file organization; file structure; index sequential files; indexed sequential access method; ISAM; overflow; overflow chaining; overflow handling; performance analysis", subject = "Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}; Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Access methods}", } @Article{Comer:1981:EKD, author = "D. Comer", title = "Extended {K-d} Tree Database Organization: a Dynamic Multiattribute File Corresponds to Leaves in the Tree", journal = j-TODS, volume = "6", number = "3", pages = "??--??", month = sep, year = "1981", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 10 12:49:00 1996", bibsource = "Database/Wiederhold.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", annote = "This paper considers a `greedy' heuristic for constructing low-cost trees.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", xxnote = "This paper does not seem to be published in TODS.", } @Article{Zaniolo:1982:DRN, author = "C. Zaniolo", title = "Database Relations with Null Values", journal = j-TODS, volume = "1", number = "1", pages = "??--??", month = mar, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 10 12:48:57 1996", bibsource = "Database/Wiederhold.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", annote = "a three-valued logic: TRUE, FALSE, UNKNOWN", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", xxnote = "This paper does not seem to be published in TODS.", } @Article{Katz:1982:DCD, author = "R. H. Katz and E. Wong", title = "Decompiling {CODASYL DML} into Relational Queries", journal = j-TODS, volume = "7", number = "1", pages = "1--23", month = mar, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-1/p1-katz/p1-katz.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-1/p1-katz/", abstract = "A ``decompilation'' algorithm is developed to transform a program written with the procedural operations of CODASYL DML into one which interacts with a relational system via a nonprocedural query specification. An Access Path Model is introduced to interpret the semantic accesses performed by the program. Data flow analysis is used to determine how FIND operations implement semantic accesses. A sequence of these is mapped into a relational query and embedded into the original program. The class of programs for which the algorithm succeeds is characterized.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems; decompilation; semantic data models", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Data manipulation languages (DML)}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Query languages}; Information Systems --- Database Management --- Heterogeneous Databases (H.2.5): {\bf Program translation**}", } @Article{Zaniolo:1982:FAD, author = "Carlo Zaniolo and Michel A. Melkanoff", title = "A Formal Approach to the Definition and the Design of Conceptual Schemata for Database Systems", journal = j-TODS, volume = "7", number = "1", pages = "24--59", month = mar, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-1/p24-zaniolo/p24-zaniolo.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-1/p24-zaniolo/", abstract = "A formal approach is proposed to the definition and the design of conceptual database diagrams to be used as conceptual schemata in a system featuring a multilevel schema architecture, and as an aid for the design of other forms of schemata. We consider E-R (entity-relationship) diagrams, and we introduce a new representation called {\em CAZ\/}-graphs. A rigorous connection is established between these diagrams and some formal constraints used to describe relationships in the framework of the relational data model. These include functional and multivalued dependencies of database relations. The basis for our schemata is a combined representation for two fundamental structures underlying every relation: the first defined by its minimal atomic decompositions, the second by its elementary functional dependencies.\par The interaction between these two structures is explored, and we show that, jointly, they can represent a wide spectrum of database relationships, of which the well-known one-to-one, one-to-many, and many-to-many associations constitute only a small subset. It is suggested that a main objective in conceptual schema design is to ensure a complete representation of these two structures. A procedure is presented to design schemata which obtain this objective while eliminating redundancy. A simple correspondence between the topological properties of these schemata and the structure of multivalued dependencies of the original relation is established. Various applications are discussed and a number of illustrative examples are given.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems, logical design TODS", subject = "Information Systems --- Database Management --- Systems (H.2.4); Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Schema and subschema}", } @Article{Batory:1982:OFD, author = "D. S. Batory", title = "Optimal File Designs and Reorganization Points", journal = j-TODS, volume = "7", number = "1", pages = "60--81", month = mar, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: University of Toronto, TR-CSRG-110, 1980.", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-1/p60-batory/p60-batory.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-1/p60-batory/", abstract = "A model for studying the combined problems of file design and file reorganization is presented. New modeling techniques for predicting the performance evolution of files and for finding optimal reorganization points for files are introduced. Applications of the model to hash-based and indexed-sequential files reveal important relationships between initial loading factors and reorganization frequency. A practical file design strategy, based on these relationships, is proposed.", acknowledgement = ack-nhfb, annote = "Applications of the model to hash-based and indexed-sequential files reveal important relationships between initial loading factors and reorganization frequency.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data processing; file design; file reorganization", subject = "Information Systems --- Database Management --- Systems (H.2.4); Information Systems --- Database Management --- Physical Design (H.2.2)", } @Article{Du:1982:DAC, author = "H. C. Du and J. S. Sobolewski", title = "Disk Allocation for {Cartesian} Product Files on Multiple-Disk Systems", journal = j-TODS, volume = "7", number = "1", pages = "82--101", month = mar, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-1/p82-du/p82-du.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-1/p82-du/", abstract = "Cartesian product files have recently been shown to exhibit attractive properties for partial match queries. This paper considers the file allocation problem for Cartesian product files, which can be stated as follows: Given a $k$-attribute Cartesian product file and an $m$-disk system, allocate buckets among the $m$ disks in such a way that, for all possible partial match queries, the concurrency of disk accesses is maximized. The Disk Modulo (DM) allocation method is described first, and it is shown to be strict optimal under many conditions commonly occurring in practice, including all possible partial match queries when the number of disks is 2 or 3. It is also shown that although it has good performance, the DM allocation method is not strict optimal for all possible partial match queries when the number of disks is greater than 3. The General Disk Modulo (GDM) allocation method is then described, and a sufficient but not necessary condition for strict optimality of the GDM method for all partial match queries and any number of disks is then derived. Simulation studies comparing the DM and random allocation methods in terms of the average number of disk accesses, in response to various classes of partial match queries, show the former to be significantly more effective even when the number of disks is greater than 3, that is, even in cases where the DM method is not strict optimal. The results that have been derived formally and shown by simulation can be used for more effective design of optimal file systems for partial match queries. When considering multiple-disk systems with independent access paths, it is important to ensure that similar records are clustered into the same or similar buckets, while similar buckets should be dispersed uniformly among the disks.", acknowledgement = ack-nhfb, annote = "For partial match queries. Allocate buckets among the m disks in such a way that, for all possible partial match queries, the concurrency of disk accesses is maximized.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Cartesian product files; data processing", subject = "Information Systems --- Database Management --- Physical Design (H.2.2); Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}; Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Search process}; Information Systems --- Database Management --- Systems (H.2.4)", } @Article{Dahl:1982:DSD, author = "Ver{\'o}nica Dahl", title = "On Database Systems Development through Logic", journal = j-TODS, volume = "7", number = "1", pages = "102--123", month = mar, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68H05 (03B99)", MRnumber = "83f:68112", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Ai/nonmono.bib; Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-1/p102-dahl/p102-dahl.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-1/p102-dahl/", abstract = "The use of logic as a single tool for formalizing and implementing different aspects of database systems in a uniform manner is discussed. The discussion focuses on relational databases with deductive capabilities and very high-level querying and defining features. The computational interpretation of logic is briefly reviewed, and then several pros and cons concerning the description of data, programs, queries, and language parser in terms of logic programs are examined. The inadequacies are discussed, and it is shown that they can be overcome by the introduction of convenient extensions into logic programming. Finally, an experimental database query system with a natural language front end, implemented in PROLOG, is presented as an illustration of these concepts. A description of the latter from the user's point of view and a sample consultation session in Spanish are included.", acknowledgement = ack-nhfb, annote = "The use of logic as a single tool for relational databases with deductive capabilities and very high-level querying and defining features. Inadequacies are discussed, and overcome by extensions into logic programming. An experimental database query system with a natural language front end, implemented in PROLOG, is presented.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems, TODS relational database; rational database", subject = "Information Systems --- Database Management --- Systems (H.2.4); Theory of Computation --- Mathematical Logic and Formal Languages --- Mathematical Logic (F.4.1): {\bf Logic and constraint programming}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Query languages}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Prolog}", } @Article{Addis:1982:RBL, author = "T. R. Addis", title = "A Relation-Based Language Interpreter for a Content Addressable File Store", journal = j-TODS, volume = "7", number = "2", pages = "125--163", month = jun, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-2/p125-addis/p125-addis.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-2/p125-addis/", abstract = "The combination of the Content Addressable File Store (CAFS \footnote{CAFS is a registered trademark of International Computers Limited}) and an extension of relational analysis is described. This combination allows a simple and compact implementation of a database query and update language (FIDL). The language has one of the important properties of a ``natural'' language interface by using a ``world model'' derived from the relational analysis. The interpreter (FLIN) takes full advantage of the CAFS by employing a unique database storage technique which results in a fast response to both queries and updates.", acknowledgement = ack-nhfb, annote = "ICL CAFS is used.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer operating systems --- program Interpreters, hardware support database machine CAFS TODS; content addressing; data base systems", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}; Information Systems --- Database Management --- Database Machines (H.2.6)", } @Article{Buneman:1982:ITD, author = "Peter Buneman and Robert E. Frankel and Rishiyur Nikhil", title = "An Implementation Technique for Database Query Languages", journal = j-TODS, volume = "7", number = "2", pages = "164--186", month = jun, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/Functional.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-2/p164-buneman/p164-buneman.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-2/p164-buneman/", abstract = "Structured query languages, such as those available for relational databases, are becoming increasingly desirable for all database management systems. Such languages are applicative: there is no need for an assignment or update statement. A new technique is described that allows for the implementation of applicative query languages against most commonly used database systems. The technique involves ``lazy'' evaluation and has a number of advantages over existing methods: it allows queries and functions of arbitrary complexity to be constructed; it reduces the use of secondary storage; it provides a simple control structure through which interfaces to other programs may be constructed; and the implementation, including the database interface, is quite compact. Although the technique is presented for a specific functional programming system and for a CODASYL DBMS, it is general and may be used for other query languages and database systems.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "applicative programming; coroutines; database interfaces; functional, data base systems; lazy evaluation; query languages; TODS functional FQL applicative programming lazy evaluation", subject = "Information Systems --- Database Management --- Systems (H.2.4); Software --- Programming Languages --- Language Classifications (D.3.2): {\bf Applicative (functional) languages}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Query languages}", } @Article{Obermarck:1982:DDD, author = "Ron Obermarck", title = "Distributed Deadlock Detection Algorithm", journal = j-TODS, volume = "7", number = "2", pages = "187--208", month = jun, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/misc.1.bib; Parallel/Multi.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-2/p187-obermarck/p187-obermarck.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-2/p187-obermarck/", abstract = "We propose an algorithm for detecting deadlocks among transactions running concurrently in a distributed processing network (i.e., a distributed database system). The proposed algorithm is a distributed deadlock detection algorithm. A proof of the correctness of the distributed portion of the algorithm is given, followed by an example of the algorithm in operation. The performance characteristics of the algorithm are also presented.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming; data base systems; deadlock detection", subject = "Information Systems --- Database Management --- Systems (H.2.4); Computer Systems Organization --- Computer-Communication Networks --- Distributed Systems (C.2.4): {\bf Distributed databases}; Software --- Operating Systems --- Process Management (D.4.1): {\bf Deadlocks}; Software --- Operating Systems --- Organization and Design (D.4.7): {\bf Distributed systems}", } @Article{Garcia-Molina:1982:ROT, author = "H{\'e}ctor Garc{\'\i}a-Molina and Gio Wiederhold", title = "Read-Only Transactions in a Distributed Database", journal = j-TODS, volume = "7", number = "2", pages = "209--234", month = jun, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Parallel/Multi.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-2/p209-garcia-molina/p209-garcia-molina.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-2/p209-garcia-molina/", abstract = "A read-only transaction or query is a transaction which does not modify any data. Read-only transactions could be processed with general transaction processing algorithms, but in many cases it is more efficient to process read-only transactions with special algorithms which take advantage of the knowledge that the transaction only reads. This paper defines the various consistency and currency requirements that read-only transactions may have. The processing of the different classes of read-only transactions in a distributed database is discussed. The concept of {$R$} insularity is introduced to characterize both the read-only and update algorithms. Several simple update and read-only transaction processing algorithms are presented to illustrate how the query requirements and the update algorithms affect the read-only transaction processing algorithms.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency control; consistency; currency; data base systems, TODS R insularity; query; R insularity; read-only transaction; schedule; serializability; transaction; transaction processing algorithm", subject = "Information Systems --- Database Management --- Systems (H.2.4); Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}; Computer Systems Organization --- Computer-Communication Networks --- Distributed Systems (C.2.4): {\bf Distributed databases}", } @Article{Shneiderman:1982:AAR, author = "Ben Shneiderman and Glenn Thomas", title = "An Architecture for Automatic Relational Database System Conversion", journal = j-TODS, volume = "7", number = "2", pages = "235--257", month = jun, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/database.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-2/p235-shneiderman/p235-shneiderman.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-2/p235-shneiderman/", abstract = "Changes in requirements for database systems necessitate schema restructuring, database translation, and application or query program conversion. An alternative to the lengthy manual revision process is proposed by offering a set of 15 transformations keyed to the relational model of data and the relational algebra. Motivations, examples, and detailed descriptions are provided.", acknowledgement = ack-nhfb, annote = "Alterations to the logical structure of a DB may necessitate changes at three levels: (1) stored database, (2) schema definition, and (3) application programs or queries. Each transformation is assessed on three features: (1) information preservation (data are not destroyed, only their logical format is altered); (2) data dependence (a data dependent transformation is one in which the stored DB must be checked to determine whether it is consistent with the logical format of the target system); and (3) program dependence (a program dependent transformation is one in which the application programs must be checked to determine whether the transformation is permissible). At every stage the DB is kept in fourth normal form. The 15 transformations are divided into five groups. The first group includes simple alterations, such as changing the name of an attribute or relation (CHANGE NAME), or adding or deleting attributes or relations (ADD/DELETE ATTRIBUTES, INTRODUCE/SEPARATE). The role played by keys in the relational model is clearly critical, and particular care must be taken when transformations involving these keys are being carried out. The second group of transformations concerns the effect of adding attributes to or deleting attributes from keys (PROMOTE/DEMOTE). The third and fourth sets of transformations are provided for the combining and dividing of relations. (COMPOSE/DECOMPOSE, PARTITION/MERGE). The final group of transformations is concerned with functional dependencies", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "automatic conversion; data base systems; database systems; relational model; transformations", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Schema and subschema}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Data manipulation languages (DML)}; Information Systems --- Database Management --- Heterogeneous Databases (H.2.5): {\bf Program translation**}", } @Article{Roussopoulos:1982:VIR, author = "Nicholas Roussopoulos", title = "View Indexing in Relational Databases", journal = j-TODS, volume = "7", number = "2", pages = "258--290", month = jun, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-2/p258-roussopoulos/p258-roussopoulos.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-2/p258-roussopoulos/", abstract = "The design and maintenance of a useful database system require efficient optimization of the logical access paths which demonstrate repetitive usage patterns. Views (classes of queries given by a query model) are an appropriate intermediate logical representation for databases. Frequently accessed views of databases need to be supported by indexing to enhance retrieval. This paper investigates the problem of selecting an optimal index set of views and describes an efficient algorithm for this selection.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data base systems, views precomputation index selection TODS index selection; index selection", subject = "Information Systems --- Database Management --- Physical Design (H.2.2); Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}", } @Article{Jacobs:1982:IRL, author = "Barry E. Jacobs and Alan R. Aronson and Anthony C. Klug", title = "On Interpretations of Relational Languages and Solutions to the Implied Constraint Problem", journal = j-TODS, volume = "7", number = "2", pages = "291--315", month = jun, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-2/p291-jacobs/p291-jacobs.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-2/p291-jacobs/", abstract = "The interconnection between conceptual and external levels of a relational database is made precise in terms of the notion of ``interpretation'' between first-order languages. This is then used to obtain a methodology for discovering constraints at the external level that are ``implied'' by constraints at the conceptual level and by conceptual-to-external mappings. It is also seen that these concepts are important in other database issues, namely, automatic program conversion, database design, and compile-time error checking of embedded database languages. Although this study deals exclusively with the relational approach, it also discusses how these ideas can be extended to hierarchical and network databases.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "constraints; data base systems; program conversion; relational database", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Schema and subschema}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Data manipulation languages (DML)}", } @Article{Chamberlin:1982:HFC, author = "Donald D. Chamberlin", title = "On ``Human Factors Comparison of a Procedural and a Nonprocedural Query Language''", journal = j-TODS, volume = "7", number = "2", pages = "316--317", month = jun, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 10 12:45:59 1996", bibsource = "Database/Graefe.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "TODS technical correspondence", } @Article{Traiger:1982:TCD, author = "Irving L. Traiger and Jim Gray and Cesare A. Galtieri and Bruce G. Lindsay", title = "Transactions and Consistency in Distributed Database Systems", journal = j-TODS, volume = "7", number = "3", pages = "323--342", month = sep, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Parallel/Multi.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-3/p323-traiger/p323-traiger.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-3/p323-traiger/", abstract = "The concepts of transaction and of data consistency are defined for a distributed system. The cases of partitioned data, where fragments of a file are stored at multiple nodes, and replicated data, where a file is replicated at several nodes, are discussed. It is argued that the distribution and replication of data should be transparent to the programs which use the data. That is, the programming interface should provide location transparency, replica transparency, concurrency transparency, and failure transparency. Techniques for providing such transparencies are abstracted and discussed.\par By extending the notions of system schedule and system clock to handle multiple nodes, it is shown that a distributed system can be modeled as a single sequential execution sequence. This model is then used to discuss simple techniques for implementing the various forms of transparency.", acknowledgement = ack-nhfb, annote = "This paper is a easy-to-read introduction to required transparency in distributed database systems. 4 transparencies are chosen and explained here, namely location transparency, replication transparency, concurrency transparency, and failure transparency. The transaction model adapted by the paper is fully synchronous and 2 phase protocol is used to implement concurrency transparency. The paper proves that if all transaction executions are two-phase, any legal execution of the transactions by a distributed system will be equivalent to some serial execution of the transactions by a system consisting of a single node under the assumption that updates are synchronous. The paper introduces special node-associated clock to prove it. The paper also gives simple explanation about a protocol to implement failure transparency using logs and two-phase commit protocol.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency control; data partitioning; data replication; recovery; TODS data replication, data partitioning", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Distributed databases}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Transaction processing}", } @Article{Fagin:1982:SUR, author = "Ronald Fagin and Alberto O. Mendelzon and Jeffrey D. Ullman", title = "A Simplified Universal Relation Assumption and its Properties", journal = j-TODS, volume = "7", number = "3", pages = "343--360", month = sep, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68H05 (68B15)", MRnumber = "83k:68100", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/bibdb.bib; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Parallel/Multi.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-3/p343-fagin/p343-fagin.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-3/p343-fagin/", abstract = "One problem concerning the universal relation assumption is the inability of known methods to obtain a database scheme design in the general case, where the real-world constraints are given by a set of dependencies that includes embedded multivalued dependencies. We propose a simpler method of describing the real world, where constraints are given by functional dependencies and a single join dependency. The relationship between this method of defining the real world and the classical methods is exposed. We characterize in terms of hypergraphs those multivalued dependencies that are the consequence of a given join dependency. Also characterized in terms of hypergraphs are those join dependencies that are equivalent to a set of multivalued dependencies.", acknowledgement = ack-nhfb, annote = "Constraints are functional dependencies and a single join dependency.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "acyclic; database scheme; hypergraph; join dependency; multivalued dependency; relational database", subject = "Theory of Computation --- Mathematical Logic and Formal Languages --- Mathematical Logic (F.4.1); Mathematics of Computing --- Discrete Mathematics --- Graph Theory (G.2.2): {\bf Graph algorithms}; Mathematics of Computing --- Discrete Mathematics --- Graph Theory (G.2.2): {\bf Trees}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Normal forms}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Schema and subschema}; Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Query formulation}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}", } @Article{Klug:1982:DVD, author = "Anthony Klug and Rod Price", title = "Determining {View} dependencies using tableaux", journal = j-TODS, volume = "7", number = "3", pages = "361--380", month = sep, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68H05", MRnumber = "83k:68103", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-3/p361-klug/p361-klug.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-3/p361-klug/", abstract = "A relational database models some part of the real world by a set of relations and a set of constraints. The constraints model properties of the stored information and must be maintained true at all times. For views defined over physically stored (base) relations, this is done by determining whether the view constraints are logical consequences of base relation constraints. A technique for determining such valid view constraints is presented in this paper. A generalization of the tableau chase is used. The idea of the method is to generate a tableau for the expression whose summary violates the test constraints in a ``canonical'' way. The chase then tries to remove this violation.\par It is also shown how this method has applications to schema design. Relations not in normal form or having other deficiencies can be replaced by normal form projections without losing the ability to represent all constraint information.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "chase; dependencies; rational algebra; relational model; tableaux; views TODS", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Schema and subschema}", } @Article{Dayal:1982:CTU, author = "Umeshwar Dayal and Philip A. Bernstein", title = "On the Correct Translation of Update Operations on Relational Views", journal = j-TODS, volume = "7", number = "3", pages = "381--416", month = sep, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68H05", MRnumber = "83k:68099", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-3/p381-dayal/p381-dayal.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-3/p381-dayal/", abstract = "Most relational database systems provide a facility for supporting user views. Permitting this level of abstraction has the danger, however, that update requests issued by a user within the context of his view may not translate correctly into equivalent updates on the underlying database. The purpose of this paper is to formalize the notion of update translation and derive conditions under which translation procedures will produce correct translations of view updates.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "external schemata; relational databases; schema mapping; update translation; user views", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Schema and subschema}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Data manipulation languages (DML)}; Information Systems --- Database Management --- Systems (H.2.4); Information Systems --- Database Management --- Heterogeneous Databases (H.2.5): {\bf Program translation**}; Computing Methodologies --- Artificial Intelligence --- Automatic Programming (I.2.2): {\bf Program transformation}", } @Article{Griffith:1982:TPR, author = "Robert L. Griffith", title = "Three Principles of Representation for Semantic Networks", journal = j-TODS, volume = "7", number = "3", pages = "417--442", month = sep, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-3/p417-griffith/p417-griffith.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-3/p417-griffith/", abstract = "Semantic networks are so intuitive and easy to use that they are often employed without much thought as to the phenomenon of semantic nets themselves. Since they are becoming more and more a tool of artificial intelligence and now database technology, it is appropriate to focus on the principles of semantic nets. Such focus finds a harmonious and consistent base which can increase the semantic quality and usefulness of such nets. Three rules of representation are presented which achieve greater conceptual simplicity for users, simplifications in semantic net implementations and maintenance, and greater consistency across semantic net applications. These rules, applied to elements of the net itself, reveal how fundamental structures should be organized, and show that the common labeled-edge semantic net can be derived from a more primitive structure involving only nodes and membership relationships (and special nodes which represent names). Also, the correlation between binary and $n$-ary relations is presented.", acknowledgement = ack-nhfb, annote = "Semantic networks are employed without much thought. They are becoming a tool of artificial intelligences and database technology, principles of semantic nets. Three rules of representation are presented. The common labeled-edge semantic net can be derived from a more primitive structure involving only nodes and membership relationships.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", subject = "Data --- Data Structures (E.1): {\bf Graphs and networks}; Computing Methodologies --- Artificial Intelligence --- Knowledge Representation Formalisms and Methods (I.2.4): {\bf Semantic networks}", } @Article{Kim:1982:OSL, author = "Won Kim", title = "On Optimizing an {SQL-like} Nested Query", journal = j-TODS, volume = "7", number = "3", pages = "443--469", month = sep, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-3/p443-kim/p443-kim.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-3/p443-kim/", abstract = "SQL is a high-level nonprocedural data language which has received wide recognition in relational databases. One of the most interesting features of SQL is the nesting of query blocks to an arbitrary depth. An SQL-like query nested to an arbitrary depth is shown to be composed of five basic types of nesting. Four of them have not been well understood and more work needs to be done to improve their execution efficiency. Algorithms are developed that transform queries involving these basic types of nesting into semantically equivalent queries that are amenable to efficient processing by existing query-processing subsystems. These algorithms are then combined into a coherent strategy for processing a general nested query of arbitrary complexity.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "aggregate function; divide; join; nested query; predicate; relational database; SQL queries TODS", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}", } @Article{Wong:1982:SAI, author = "Eugene Wong", title = "A Statistical Approach to Incomplete Information in Database Systems", journal = j-TODS, volume = "7", number = "3", pages = "470--488", month = sep, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68H05", MRnumber = "83k:68108", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/bibdb.bib; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-3/p470-wong/p470-wong.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-3/p470-wong/", abstract = "There are numerous situations in which a database cannot provide a precise answer to some of the questions that are posed. Sources of imprecision vary and include examples such as recording errors, incompatible scaling, and obsolete data. In many such situations, considerable prior information concerning the imprecision exists and can be exploited to provide valuable information for queries to which no exact answer can be given. The objective of this paper is to provide a framework for doing so.", acknowledgement = ack-nhfb, annote = "Sources of imprecision include recording errors, incompatible scaling, and obsolete data. In many situations considerable prior information concerning the imprecision exists and can be exploited. This paper provides a framework. Null values.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "incomplete information; missing values; null values; TODS null values", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Schema and subschema}", } @Article{Zaniolo:1982:NNF, author = "Carlo Zaniolo", title = "A New Normal Form for the Design of Relational Database Schemata", journal = j-TODS, volume = "7", number = "3", pages = "489--499", month = sep, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68H05 (68B15)", MRnumber = "83k:68109", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-3/p489-zaniolo/p489-zaniolo.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-3/p489-zaniolo/", abstract = "This paper addresses the problem of database schema design in the framework of the relational data model and functional dependencies. It suggests that both Third Normal Form (3NF) and Boyce-Codd Normal Form (BCNF) supply an inadequate basis for relational schema design. The main problem with 3NF is that it is too forgiving and does not enforce the separation principle as strictly as it should. On the other hand, BCNF is incompatible with the principle of representation and prone to computational complexity. Thus a new normal form, which lies between these two and captures the salient qualities of both is proposed. The new normal form is stricter than 3NF, but it is still compatible with the representation principle. First a simpler definition of 3NF is derived, and the analogy of this new definition to the definition of BCNF is noted. This analogy is used to derive the new normal form. Finally, it is proved that Bernstein's algorithm for schema design synthesizes schemata that are already in the new normal form.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database schema; functional dependencies; relational model", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Normal forms}", } @Article{Lam:1982:CSA, author = "K. Lam and C. T. Yu", title = "A Clustered Search Algorithm Incorporating Arbitrary Term Dependencies", journal = j-TODS, volume = "7", number = "3", pages = "500--508", month = sep, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68H05", MRnumber = "83k:68104", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/bibdb.bib; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-3/p500-lam/p500-lam.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-3/p500-lam/", abstract = "The documents in a database are organized into clusters, where each cluster contains similar documents and a representative of these documents. A user query is compared with all the representatives of the clusters, and on the basis of such comparisons, those clusters having many {\em close neighbors\/} with respect to the query are selected for searching. This paper presents an estimation of the number of close neighbors in a cluster in relation to the given query. The estimation takes into consideration the dependencies between terms. It is demonstrated by experiments that the estimate is accurate and the time to generate the estimate is small.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Bahadur-Lazarsfeld expansion; clustered search; CTYU TODS; generating polynomial; term dependencies", subject = "Theory of Computation --- Analysis of Algorithms and Problem Complexity --- General (F.2.0); Mathematics of Computing --- Discrete Mathematics --- Combinatorics (G.2.1): {\bf Combinatorial algorithms}; Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}; Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Clustering}; Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Retrieval models}; Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Search process}", } @Article{Batory:1982:UMP, author = "D. S. Batory and C. C. Gotlieb", title = "A Unifying Model of Physical Databases", journal = j-TODS, volume = "7", number = "4", pages = "509--539", month = dec, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-4/p509-batory/p509-batory.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-4/p509-batory/", abstract = "A unifying model for the study of database performance is proposed. Applications of the model are shown to relate and extend important work concerning batched searching, transposed files, index selection, dynamic hash-based files, generalized access path structures, differential files, network databases, and multifile query processing.", acknowledgement = ack-nhfb, annote = "See also \cite{Piwowarski:1985:CBS}.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database systems, TODS decomposition; decomposition; linksets; simple files; unifying model", subject = "Information Systems --- Database Management --- Physical Design (H.2.2)", } @Article{Aghili:1982:PGD, author = "Houtan Aghili and Dennis G. Severance", title = "Practical Guide to the Design of Differential Files for Recovery of On-Line Databases", journal = j-TODS, volume = "7", number = "4", pages = "540--565", month = dec, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-4/p540-aghili/p540-aghili.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-4/p540-aghili/", abstract = "The concept of a differential file has previously been proposed as an efficient means of collecting database updates for on-line systems. This paper studies the problem of database backup and recovery for such systems, and presents an analytic model of their operation. Five key design decisions are identified and an optimization procedure for each is developed. A design algorithm that quickly provides parameters for a near-optimal differential file architecture is provided.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "backup and recovery; data processing; database maintenance; database systems; differential files; hashing functions; numerical methods; optimization; reorganization", subject = "Data --- Data Storage Representations (E.2); Mathematics of Computing --- Numerical Analysis (G.1); Information Systems --- Database Management --- Physical Design (H.2.2); Information Systems --- Database Management --- Database Administration (H.2.7)", } @Article{Larson:1982:PAL, author = "Per-{\AA}ke Larson", title = "Performance Analysis of Linear Hashing with Partial Expansions", journal = j-TODS, volume = "7", number = "4", pages = "566--587", month = dec, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-4/p566-larson/p566-larson.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-4/p566-larson/", abstract = "Linear hashing with partial expansions is a new file organization primarily intended for files which grow and shrink dynamically. This paper presents a mathematical analysis of the expected performance of the new scheme. The following performance measures are considered: length of successful and unsuccessful searches, accesses required to insert or delete a record, and the size of the overflow area. The performance is cyclical. For all performance measures, the necessary formulas are derived for computing the expected performance at any point of a cycle and the average over a cycle. Furthermore, the expected worst case in connection with searching is analyzed. The overall performance depends on several file parameters. The numerical results show that for many realistic parameter combinations the performance is expected to be extremely good. Even the longest search is expected to be of quite reasonable length.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "dynamic hashing schemes; extendible hashing; hashing; linear hashing; TODS dynamic hashing, extendible hashing, TODS dynamic hashing, extendible hashing, data processing", subject = "Theory of Computation --- Analysis of Algorithms and Problem Complexity --- Nonnumerical Algorithms and Problems (F.2.2): {\bf Sorting and searching}; Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Access methods}; Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}", } @Article{Babb:1982:JNF, author = "E. Babb", title = "Joined Normal Form: a Storage Encoding for Relational Databases", journal = j-TODS, volume = "7", number = "4", pages = "588--614", month = dec, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-4/p588-babb/p588-babb.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-4/p588-babb/", abstract = "A new on-line query language and storage structure for a database machine is presented. By including a mathematical model in the interpreter the query language has been substantially simplified so that no reference to relation names is necessary. By storing the model as a single joined normal form (JNF) file, it has been possible to exploit the powerful search capability of the Content Addressable File Store (CAFS; CAFS is a registered trademark of International Computers Limited) database machine.", acknowledgement = ack-nhfb, annote = "prejoining for CAFS.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "CAFS; content addressing hardware; database systems; functional dependencies; implication network; joined normal form; joins; mathematical model; network; queries; relational database; storage encoding tags; storage encoding, TODS CAFS, third normal form; third normal form; updates", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Query languages}; Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Search process}", } @Article{Heyman:1982:MMD, author = "Daniel P. Heyman", title = "Mathematical Models of Database Degradation", journal = j-TODS, volume = "7", number = "4", pages = "615--631", month = dec, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-4/p615-heyman/p615-heyman.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-4/p615-heyman/", abstract = "As data are updated, the initial physical structure of a database is changed and retrieval of specific pieces of data becomes more time consuming. This phenomenon is called database degradation. In this paper two models of database degradation are described. Each model refers to a different aspect of the problem.\par It is assumed that transactions are statistically independent and either add, delete, or update data. The first model examines the time during which a block of data is filling up. The second model examines the overflows from a block of data, which essentially describes the buildup of disorganization. Analytical results are obtained for both models. In addition, several numerical examples are presented which show that the mean number of overflows grows approximately linearly with time. This approximation is used to devise a simple formula for the optimal time to reorganize a stochastically growing database.", acknowledgement = ack-nhfb, classification = "723; 921", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data overflows; database degradation, TODS data overflows; database systems; file organization; mathematical models", subject = "Computer Systems Organization --- Performance of Systems (C.4): {\bf Modeling techniques}; Information Systems --- Database Management --- Database Administration (H.2.7): {\bf Logging and recovery}", } @Article{Korth:1982:DFU, author = "Henry F. Korth", title = "Deadlock Freedom Using Edge Locks", journal = j-TODS, volume = "7", number = "4", pages = "632--652", month = dec, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-4/p632-korth/p632-korth.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-4/p632-korth/", abstract = "We define a series of locking protocols for database systems that all have three main features: freedom from deadlock, multiple granularity, and support for general collections of locking primitives. A rooted directed acyclic graph is used to represent multiple granularities, as in System R. Deadlock freedom is guaranteed by extending the System R protocol to require locks on edges of the graph in addition to the locks required on nodes.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency control; database systems; locking; serializability", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Transaction processing}", } @Article{Goodman:1982:TQS, author = "Nathan Goodman and Oded Shmueli", title = "Tree Queries: a Simple Class of Relational Queries", journal = j-TODS, volume = "7", number = "4", pages = "653--677", month = dec, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Parallel/Multi.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-4/p653-goodman/p653-goodman.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-4/p653-goodman/", abstract = "One can partition the class of relational database schemas into tree schemas and cyclic schemas. (These are called acyclic hypergraphs and cyclic hypergraphs elsewhere in the literature.) This partition has interesting implications in query processing, dependency theory, and graph theory.\par The tree/cyclic partitioning of database schemas originated with a similar partition of equijoin queries. Given an arbitrary equijoin query one can obtain an equivalent query that calculates the natural join of all relations in (an efficiently) derived database; such a query is called a natural join (NJ) query. If the derived database is a tree schema the original query is said to be a tree query, and otherwise a cyclic query.\par In this paper we analyze query processing consequences of the tree/cyclic partitioning. We are able to argue, qualitatively, that queries which imply a tree schema are easier to process than those implying a cyclic schema. Our results also extend the study of the semijoin operator.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "acyclic schemes; cyclic schemas; database systems; join; semijoin; tree queries; tree schemas", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}", } @Article{Kerschberg:1982:QOS, author = "Larry Kerschberg and Peter D. Ting and S. Bing Yao", title = "Query Optimization in Star Computer Networks", journal = j-TODS, volume = "7", number = "4", pages = "678--711", month = dec, year = "1982", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Parallel/Multi.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1982-7-4/p678-kerschberg/p678-kerschberg.pdf; http://www.acm.org/pubs/citations/journals/tods/1982-7-4/p678-kerschberg/", abstract = "Query processing is investigated for relational databases distributed over several computers organized in a star network. Minimal response-time processing strategies are presented for queries involving the select, project, and join commands. These strategies depend on system parameters such as communication costs and different machine processing speeds; database parameters such as relation cardinality and file size; and query parameters such as estimates of the size and number of tuples in the result relation. The optimal strategies specify relation preparation processes, the shipping strategy, serial or parallel processing, and, where applicable, the site of join filtering and merging. Strategies for optimizing select and join queries have been implemented and tested.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer networks; database systems; query optimization; relational database system; star computer network", subject = "Computer Systems Organization --- Computer-Communication Networks --- Distributed Systems (C.2.4): {\bf Distributed applications}; Computer Systems Organization --- Computer-Communication Networks --- Distributed Systems (C.2.4): {\bf Distributed databases}; Computer Systems Organization --- Performance of Systems (C.4): {\bf Design studies}; Computer Systems Organization --- Performance of Systems (C.4): {\bf Modeling techniques}; Software --- Operating Systems --- File Systems Management (D.4.3): {\bf Distributed file systems}; Software --- Operating Systems --- Organization and Design (D.4.7): {\bf Distributed systems}; Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Access methods}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Distributed databases}", } @Article{Maier:1983:MOS, author = "David Maier and Jeffrey D. Ullman", title = "Maximal Objects and the Semantics of Universal Relation Databases", journal = j-TODS, volume = "8", number = "1", pages = "1--14", month = mar, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Parallel/Multi.bib", note = "Also published in/as: SUNY, Stony Brook, CS, TR 80/016, 1980.", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-1/p1-maier/p1-maier.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-1/p1-maier/", abstract = "The universal relation concept is intended to provide the database user with a simplified model in which he can compose queries without regard to the underlying structure of the relations in the database. Frequently, the lossless join criterion provides the query interpreter with the clue needed to interpret the query as the user intended. However, some examples exist where interpretation by the lossless-join rule runs contrary to our intuition. To handle some of these cases, we propose a concept called {\em maximal objects}, which modifies the universal relation concept in exactly those situations where it appears to go awry --- when the underlying relational structure has ``cycles.'' We offer examples of how the maximal object concept provides intuitively correct interpretations. We also consider how one might construct maximal objects mechanically from purely syntactic structural information --- the relation schemes and functional dependencies --- about the database.", acknowledgement = ack-nhfb, annote = "A universal relation is represented by a hypergraph. If the hypergraph is cyclic, some queries can be evaluated in different ways; restricting navigation to few acyclic components (maximal objects) gives intuitively correct answers.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "acyclic hypergraph; database systems; relational database; universal relation", subject = "Mathematics of Computing --- Discrete Mathematics --- Graph Theory (G.2.2): {\bf Path and circuit problems}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Data description languages (DDL)}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Data manipulation languages (DML)}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}", } @Article{Haskin:1983:OCH, author = "Roger L. Haskin and Lee A. Hollaar", title = "Operational Characteristics of a Hardware-Based Pattern Matcher", journal = j-TODS, volume = "8", number = "1", pages = "15--40", month = mar, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-1/p15-haskin/p15-haskin.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-1/p15-haskin/", abstract = "The design and operation of a new class of hardware-based pattern matchers, such as would be used in a backended database processor in a full-text or other retrieval system, is presented. This recognizer is based on a unique implementation technique for finite state automata consisting of partitioning the state table among a number of simple digital machines. It avoids the problems generally associated with implementing finite state machines, such as large state table memories, complex control mechanisms, and state encodings. Because it consists primarily of memory, with its high regularity and density, needs only limited static interconnections, and operates at a relatively low speed, it can be easily constructed using integrated circuit techniques.\par After a brief discussion of other pattern-matching hardware, the structure and operation of the partitioned finite state automaton is given, along with a simplified discussion of how the state tables are partitioned. The expected performance of the resulting system and the state table partitioning programs is then discussed.", acknowledgement = ack-nhfb, classification = "723; 901", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "backend processors; computer system architecture; database systems; finite state automata; full text retrieval systems; information science; text searching", subject = "Hardware --- Logic Design --- Design Styles (B.6.1): {\bf Cellular arrays and automata}; Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf Algorithms implemented in hardware}; Information Systems --- Database Management --- Database Machines (H.2.6); Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Search process}", } @Article{Sicherman:1983:AQR, author = "George L. Sicherman and Wiebren {De Jonge} and Reind P. {Van De Riet}", title = "Answering Queries without Revealing Secrets", journal = j-TODS, volume = "8", number = "1", pages = "41--59", month = mar, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib", note = "Also published in/as: reprinted in deJonge thesis, Jun. 1985.", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-1/p41-sicherman/p41-sicherman.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-1/p41-sicherman/", abstract = "Question-answering systems must often keep certain information secret. This can be accomplished, for example, by sometimes refusing to answer a query. Here the danger of revealing a secret by refusing to answer a query is investigated. First several criteria that can be used to decide whether or not to answer a query are developed. First several criteria that can be used to decide whether or not to answer a query are developed. Then it is shown which of these criteria are safe if the questioner knows nothing at all about what is kept secret. Furthermore, it is proved that one of these criteria is safe even if the user of the system knows which information is to be kept secret.", acknowledgement = ack-nhfb, acmcrnumber = "8404-296", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database systems, statistical security TODS; keeping secrets; refusal to answer; strategy", subject = "Information Systems --- Models and Principles --- Systems and Information Theory (H.1.1): {\bf Value of information}; Information Systems --- Models and Principles --- User/Machine Systems (H.1.2); Information Systems --- Information Storage and Retrieval --- Systems and Software (H.3.4): {\bf Current awareness systems (selective dissemination of information--SDI)**}; Information Systems --- Information Storage and Retrieval --- Systems and Software (H.3.4): {\bf Question-answering (fact retrieval) systems**}; Computing Methodologies --- Artificial Intelligence --- Deduction and Theorem Proving (I.2.3): {\bf Answer/reason extraction}", } @Article{deJonge:1983:CSD, author = "Wiebren de Jonge", title = "Compromising Statistical Databases Responding to Queries About Means", journal = j-TODS, volume = "8", number = "1", pages = "60--80", month = mar, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib", note = "Also published in/as: reprinted in Jun. 1985 thesis", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-1/p60-de_jonge/p60-de_jonge.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-1/p60-de_jonge/", abstract = "This paper describes how to compromise a statistical database which only answers queries about arithmetic means for query sets whose cardinality falls in the range $ [k, N - k] $, for some $k$ greater than $0$, where $N$ greater than equivalent to $ 2 k $ is the number of records in the database. The compromise is shown to be easy and to require only a little preknowledge; knowing the cardinality of just one nonempty query set is usually sufficient.\par This means that not only count and sum queries, but also queries for arithmetic means can be extremely dangerous for the security of a statistical database, and that this threat must be taken into account explicitly by protective measures. This seems quite important from a practical standpoint: while arithmetic means were known for some time to be not altogether harmless, the (perhaps surprising) extent of the threat is now shown.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "compromise; database security; database systems; security TODS; statistical databases", subject = "Information Systems --- Models and Principles --- Systems and Information Theory (H.1.1): {\bf Value of information}; Information Systems --- Models and Principles --- User/Machine Systems (H.1.2); Information Systems --- Information Storage and Retrieval --- Systems and Software (H.3.4): {\bf Question-answering (fact retrieval) systems**}", } @Article{Graham:1983:FD, author = "Marc H. Graham", title = "Functions in Databases", journal = j-TODS, volume = "8", number = "1", pages = "81--109", month = mar, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "85a:68036", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-1/p81-graham/p81-graham.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-1/p81-graham/", abstract = "We discuss the objectives of including functional dependencies in the definition of a relational database. We find two distinct objectives. The appearance of a dependency in the definition of a database indicates that the states of the database are to encode a function. A method based on the chase of calculating the function encoded by a particular state is given and compared to methods utilizing derivations of the dependency. A test for deciding whether the states of a schema may encode a nonempty function is presented as is a characterization of the class of schemas which are capable of encoding nonempty functions for all the dependencies in the definition. This class is the class of dependency preserving schemas as defined by Beeri et al. and is strictly larger than the class presented by Bernstein.\par The second objective of including a functional dependency in the definition of a database is that the dependency be capable of constraining the states of the database; that is, capable of uncovering input errors made by the users. We show that this capability is weaker than the first objective; thus, even dependencies whose functions are everywhere empty may still act as constraints. Bounds on the requirements for a dependency to act as a constraint are derived. \par These results are founded on the notion of a weak instance for a database state, which replaces the universal relation instance assumption and is both intuitively and computationally more nearly acceptable.", acknowledgement = ack-nhfb, annote = "Method based on the chase of calculating the function is given; the dependency should constrain the states of the database; many algorithms.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "functional dependencies; tableaux; TODS functional dependencies, tableaux, database systems", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Normal forms}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Schema and subschema}", } @Article{Katz:1983:RCG, author = "R. H. Katz and E. Wong", title = "Resolving Conflicts in Global Storage Design Through Replication", journal = j-TODS, volume = "8", number = "1", pages = "110--135", month = mar, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-1/p110-katz/p110-katz.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-1/p110-katz/", abstract = "We present a conceptual framework in which a database's intra- and interrecord set access requirements are specified as a constrained assignment of abstract characteristics (``evaluated,'' ``indexed,'' ``clustered,'' ``well-placed'') to logical access paths. We derive a physical schema by choosing an available storage structure that most closely provides the desired access characteristics. We use explicit replication of schema objects to reduce the access cost along certain paths, and analyze the trade-offs between increased update overhead and improved retrieval access. Finally, we given an algorithm to select storage structures for a CODASYL 78 DBTG schema, given its access requirements specification.", acknowledgement = ack-nhfb, annote = "Access path data model deduced from the Entity-Relationship Model. Each function is augmented with access characteristics, evaluated, indexed, clustered, and well-placed.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "access path selection; database systems, TODS functional data model; functional data model; storage structure choice", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Access methods}", } @Article{Lomet:1983:BIE, author = "David B. Lomet", title = "Bounded Index Exponential Hashing", journal = j-TODS, volume = "8", number = "1", pages = "136--165", month = mar, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-1/p136-lomet/p136-lomet.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-1/p136-lomet/", abstract = "Bounded index exponential hashing, a new form of extendible hashing, is described. It has the important advantages over most of the other extendible hashing variants of both (i) providing random access to any record of a file in close to one disk access and (ii) having performance which does not vary with file size. It is straightforward to implement and demands only a fixed and specifiable amount of main storage to achieve this performance. Its underlying physical disk storage is readily managed and record overflow is handled so as to insure that unsuccessful searches never take more than two accesses. The method's ability to access data in close to a single disk access makes it possible to organize a database, in which files have a primary key and multiple secondary keys, such that the result is a significant performance advantage over existing organizations.", acknowledgement = ack-nhfb, classification = "722", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data processing, TODS extendible hashing; extendible hasing; tree index methods", subject = "Software --- Operating Systems --- File Systems Management (D.4.3): {\bf File organization}; Data --- Data Storage Representations (E.2): {\bf Hash-table representations}; Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}", } @Article{Stonebraker:1983:PER, author = "Michael Stonebraker and John Woodfill and Jeff Ranstrom and Marguerite Murphy and Marc Meyer and Eric Allman", title = "Performance Enhancements to a Relational Database System", journal = j-TODS, volume = "8", number = "2", pages = "167--185", month = jun, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-2/p167-stonebraker/p167-stonebraker.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-2/p167-stonebraker/", abstract = "In this paper we examine four performance enhancements to a database management system: dynamic compilation, microcoded routines, a special-purpose file system, and a special-purpose operating system. All were examined in the context of the INGRES database management system. Benchmark timings that are included suggest the attractiveness of dynamic compilation and a special-purpose file system. Microcode and a special-purpose operating system are analyzed and appear to be of more limited utility in the INGRES context.", acknowledgement = ack-nhfb, annote = "Estimates are given for compilation, micro-coding, a file system which supports locality, and a specialized operating system for INGRES.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "compiled query languages; database performance; database systems, TODS dynamic compilation microcode special purpose file operating system; file systems for databases; microcode", subject = "Information Systems --- Database Management --- General (H.2.0); Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}; Information Systems --- Database Management --- Database Machines (H.2.6)", } @Article{Garcia-Molina:1983:USK, author = "H{\'e}ctor Garc{\'\i}a-Molina", title = "Using Semantic Knowledge for Transaction Processing in a Distributed Database", journal = j-TODS, volume = "8", number = "2", pages = "186--213", month = jun, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Parallel/Multi.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-2/p186-garcia-molina/p186-garcia-molina.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-2/p186-garcia-molina/", abstract = "This paper investigates how the semantic knowledge of an application can be used in a distributed database to process transactions efficiently and to avoid some of the delays associated with failures. The main idea is to allow nonserializable schedules which preserve consistency and which are acceptable to the system users. To produce such schedules, the transaction processing mechanism receives semantic information from the users in the form of transaction semantic types, a division of transactions into steps, compatibility sets, and countersteps. Using these notions, we propose a mechanism which allows users to exploit their semantic knowledge in an organized fashion. The strengths and weaknesses of this approach are discussed.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency control; consistency; database systems; locking; schedule; semantic knowledge; serializability", subject = "Computer Systems Organization --- Computer-Communication Networks --- Distributed Systems (C.2.4): {\bf Distributed databases}; Information Systems --- Database Management --- Systems (H.2.4); Information Systems --- Database Management --- Systems (H.2.4): {\bf Transaction processing}", } @Article{Clifford:1983:FST, author = "James Clifford and David S. Warren", title = "Formal Semantics for Time in Databases", journal = j-TODS, volume = "8", number = "2", pages = "214--254", month = jun, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Ai/ai.misc.bib; Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-2/p214-clifford/p214-clifford.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-2/p214-clifford/", abstract = "The concept of a historical database is introduced as a tool for modeling the dynamic nature of some part of the real world. Just as first-order logic has been shown to be a useful formalism for expressing and understanding the underlying semantics of the relational database model, intensional logic is presented as an analogous formalism for expressing and understanding the temporal semantics involved in a historical database. The various components of the relational model, as extended to include historical relations, are discussed in terms of the model theory for the logic IL//s, a variation of the logic IL formulated by Richard Montague. The modal concepts of intensional and extensional data constraints and queries are introduced and contrasted. Finally, the potential application of these ideas to the problem of natural language database querying is discussed.", acknowledgement = ack-nhfb, annote = "all timeslices are represented. No inference needed. Storage could be huge. Uses term historical db versus temporal db. Intensional Montague logic. Two timestamps: `state' and `exist'", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database systems; entity-relationship model; historical databases; intensional logic; relational database; temporal semantics", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}", } @Article{Bitton:1983:DRE, author = "Dina Bitton and David J. DeWitt", title = "Duplicate Record Elimination in Large Data Files", journal = j-TODS, volume = "8", number = "2", pages = "255--265", month = jun, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-2/p255-bitton/p255-bitton.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-2/p255-bitton/", abstract = "The issue of duplicate elimination for large data files in which many occurrences of the same record may appear is addressed. A comprehensive cost analysis of the duplicate elimination operation is presented. This analysis is based on a combinatorial model developed for estimating the size of intermediate runs produced by a modified merge-sort procedure. The performance of this modified merge-sort procedure is demonstrated to be significantly superior to the standard duplicate elimination technique of sorting followed by a sequential pass to locate duplicate records. The results can also be used to provide critical input to a query optimizer in a relational database system.", acknowledgement = ack-nhfb, annote = "use a modified sort-merge.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database systems, early aggregation TODS; duplicate elimination; projection operator; sorting", subject = "Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Access methods}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}", } @Article{Sagiv:1983:CGC, author = "Yehoshua Sagiv", title = "A Characterization of Globally Consistent Databases and Their Correct Access Paths", journal = j-TODS, volume = "8", number = "2", pages = "266--286", month = jun, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Ai/prolog.1.bib; Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-2/p266-sagiv/p266-sagiv.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-2/p266-sagiv/", abstract = "The representative instance is proposed as a representation of the data stored in a database whose relations are not the projections of a universal instance. Database schemes are characterized for which local consistency implies global consistency. (Local consistency means that each relation satisfies its own functional dependencies; global consistency means that the representative instance satisfies all the functional dependencies). A method of efficiently computing projections of the representative instance is given, provided that local consistency implies global consistency. Throughout, it is assumed that a cover of the functional dependencies is embodied in the database scheme in the form of keys.", acknowledgement = ack-nhfb, annote = "Inter-relational consistency based on FD's", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "chase; database systems, TODS chase, universal relation scheme, extension join, relational algebra; extension join; functional dependency; null value; prolog; relational algebra; relational database; representative instance; universal relation scheme", subject = "Theory of Computation --- Mathematical Logic and Formal Languages --- Mathematical Logic (F.4.1); Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Normal forms}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Schema and subschema}; Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Query formulation}", } @Article{Ullman:1983:CTJ, author = "Jeffrey D. Ullman", title = "Corrigendum: The Theory of Joins in Relational Databases", journal = j-TODS, volume = "8", number = "2", pages = "287--287", month = jun, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibsource = "Database/Graefe.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "See \cite{Aho:1979:TJR}.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Cardenas:1983:PRA, author = "Alfonso F. Cardenas and Farid Alavian and Algirdas Avizienis", title = "Performance of Recovery Architectures in Parallel Associative Database Processors", journal = j-TODS, volume = "8", number = "3", pages = "291--323", month = sep, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-3/p291-cardenas/p291-cardenas.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-3/p291-cardenas/", abstract = "The need for robust recovery facilities in modern database management systems is quite well known. Various authors have addressed recovery facilities and specific techniques, but none have delved into the problem of recovery in database machines. In this paper, the types of undesirable events that occur in a database environment are classified and the necessary recovery information, with subsequent actions to recover the correct state of the database, is summarized. A model of the ``processor-per-track'' class of parallel associative database processor is presented. Three different types of recovery mechanisms that may be considered for parallel associative database processors are identified. For each architecture, both the workload imposed by the recovery mechanisms on the execution of database operations (i.e., retrieve, modify, delete, and insert) and the workload involved in the recovery actions (i.e., rollback, restart, restore, and reconstruct) are analyzed. The performance of the three architectures is quantitatively compared. This comparison is made in terms of the number of extra revolutions of the database area required to process a transaction versus the number of records affected by a transaction. A variety of different design parameters of the database processor, of the database, and of a mix of transaction types (modify, insert, and delete) are considered. A large number of combinations is selected and the effects of the parameters on the extra processing time are identified.", acknowledgement = ack-nhfb, annote = "three methods of recovery in logic-per-track processors are analyzed.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "associative database processors; database systems, hardware support machine TODS", subject = "Computer Systems Organization --- Performance of Systems (C.4): {\bf Performance attributes}; Information Systems --- Database Management --- Database Machines (H.2.6); Information Systems --- Database Management --- Database Administration (H.2.7): {\bf Logging and recovery}", } @Article{Bitton:1983:PAE, author = "Dina Bitton and Haran Boral and David J. DeWitt and W. Kevin Wilkinson", title = "Parallel Algorithms for the Execution of Relational Database Operations", journal = j-TODS, volume = "8", number = "3", pages = "324--353", month = sep, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-3/p324-bitton/p324-bitton.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-3/p324-bitton/", abstract = "This paper presents and analyzes algorithms for parallel processing of relational database operations in a general multiprocessor framework. To analyze alternative algorithms, we introduce an analysis methodology which incorporates I/O, CPU, and message costs and which can be adjusted to fit different multiprocessor architectures. Algorithms are presented and analyzed for sorting, projection, and join operations. While some of these algorithms have been presented and analyzed previously, we have generalized each in order to handle the case where the number of pages is significantly larger than the number of processors. In addition, we present and analyze algorithms for the parallel execution of update and aggregate operations.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "aggregate operations; database machines; database systems; join operation; parallel processing; projection operator; sorting", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}; Information Systems --- Database Management --- Database Machines (H.2.6)", } @Article{Eager:1983:ARD, author = "Derek L. Eager and Kenneth C. Sevcik", title = "Achieving Robustness in Distributed Database Systems", journal = j-TODS, volume = "8", number = "3", pages = "354--381", month = sep, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; Distributed/fault.tolerant.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Parallel/Multi.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-3/p354-eager/p354-eager.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-3/p354-eager/", abstract = "The problem of concurrency control in distributed database systems in which site and communication link failures may occur is considered. The possible range of failures is not restricted; in particular, failures may induce an arbitrary network partitioning. It is desirable to attain a high ``level of robustness'' in such a system; that is, these failures should have only a small impact on system operation.\par A level of robustness termed {\em maximal partial operability\/} is identified. Under our models of concurrency control and robustness, this robustness level is the highest level attainable without significantly degrading performance.\par A basis for the implementation of maximal partial operability is presented. To illustrate its use, it is applied to a distributed locking concurrency control method and to a method that utilizes timestamps. When no failures are present, the robustness modifications for these methods induce no significant additional overhead.", acknowledgement = ack-nhfb, annote = "Three phases: 1. read and write to a private workspace, 2. indicate intention to commit, restart, or abort, 3. if verified complete actual transaction. Intention to update from phase 2 is withdrawn if abort or restart is the end result of phase 2. Two versions are presented, either the data are transmitted in phase 3, or, if handled as phase 2 of two-phase commit protocol, the data are held in secure storage from phase 2 to phase 3. In case of partitioning, voting (ref. Gifford) is used, but to prevent broad lockouts, those transactions which cannot update all copies must post this failure at a quorum of nodes, so that successor transactions can test that they do not conflict, and do not enter phase 3. Dangling precommits may be resolved by checking other nodes in the partition. On re-establishing the net, acyclicness is created by possibly restarting transactions which led to a cycle in the combined schedule.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency control; database systems; network partitioning; robustness; serializability", subject = "Information Systems --- Database Management --- Physical Design (H.2.2); Information Systems --- Database Management --- Systems (H.2.4); Information Systems --- Database Management --- Database Administration (H.2.7)", } @Article{Trueblood:1983:MMM, author = "Robert P. Trueblood and H. Rex Hartson and Johannes J. Martin", title = "{MULTISAFE} --- {A} Modular Multiprocessing Approach to Secure Database Management", journal = j-TODS, volume = "8", number = "3", pages = "382--409", month = sep, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-3/p382-trueblood/p382-trueblood.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-3/p382-trueblood/", abstract = "This paper describes the configuration and intermodule communication of a MULTImodule system for supporting Secure Authorization with Full Enforcement (MULTISAFE) for database management. A modular architecture is described which provides secure, controlled access to shared data in a multiuser environment, with low performance penalties, even for complex protection policies. The primary mechanisms are structured and verifiable. The entire approach is immediately extendible to distributed protection of distributed data. The system includes a user and applications module (UAM), a data storage and retrieval module (SRM), and a protection and security module (PSM). The control of intermodule communication is based on a data abstraction approach, initially described in terms of function invocations. An implementation within a formal message system is then described. The discussion of function invocations begins with the single terminal case and extends to the multiterminal case. Some physical implementation aspects are also discussed, and some examples of message sequences are given.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "abstract data types; access control; back-end database; database systems; intermodule communication; secure database; TODS", subject = "Computer Systems Organization --- Processor Architectures --- Multiple Data Stream Architectures (Multiprocessors) (C.1.2); Software --- Operating Systems --- Storage Management (D.4.2); Software --- Operating Systems --- Security and Protection (D.4.6); Information Systems --- Database Management (H.2); Information Systems --- Database Management --- Systems (H.2.4); Information Systems --- Database Management --- Database Machines (H.2.6)", } @Article{Ito:1983:HFO, author = "Tetsuro Ito and Makoto Kizawa", title = "Hierarchical File Organization and its Application to Similar-String Matching", journal = j-TODS, volume = "8", number = "3", pages = "410--433", month = sep, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/spell.bib; https://www.math.utah.edu/pub/tex/bib/string-matching.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-3/p410-ito/p410-ito.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-3/p410-ito/", abstract = "The automatic correction of misspelled inputs is discussed from a viewpoint of similar-string matching. First a hierarchical file organization based on a linear ordering of records is presented for retrieving records highly similar to any input query. Then the spelling problem is attacked by constructing a hierarchical file for a set of strings in a dictionary of English words. The spelling correction steps proceed as follows: (1) find one of the best-match strings which are most similar to a query, (2) expand the search area for obtaining the good-match strings, and (3) interrupt the file search as soon as the required string is displayed. Computational experiments verify the performance of the proposed methods for similar-string matching under the UNIX time-sharing system.", acknowledgement = ack-nhfb, annote = "A spelling checker to provide possible correct spellings for all possible words. Results are quite sketchy", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "best match; data processing, algorithms; experimentation; file organization; good match; hierarchical clustering; linear ordering; measurement; office automation; performance; similar-string; similarity; spelling correction; text editor; theory; verification", review = "ACM CR 8408-0665", subject = "I.2 Computing Methodologies, ARTIFICIAL INTELLIGENCE, Natural Language Processing \\ I.5.4 Computing Methodologies, PATTERN RECOGNITION, Applications, Text processing \\ E.5 Data, FILES, Organization/structure \\ H.3.2 Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Storage, File organization \\ H.3.3 Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Search process \\ H.3.3 Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Selection process \\ H.4 Information Systems, INFORMATION SYSTEMS APPLICATIONS, Office Automation", } @Article{Kolodner:1983:IRS, author = "Janet L. Kolodner", title = "Indexing and Retrieval Strategies for Natural Language Fact Retrieval", journal = j-TODS, volume = "8", number = "3", pages = "434--464", month = sep, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Ai/ai.misc.bib; Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-3/p434-kolodner/p434-kolodner.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-3/p434-kolodner/", abstract = "Researchers in artificial intelligence have recently become interested in natural language fact retrieval; currently, their research is at a point where it can begin contributing to the field of Information Retrieval. In this paper, strategies for a natural language fact retrieval system are mapped out, and approaches to many of the organization and retrieval problems are presented. The CYRUS system, which keeps track of important people and is queried in English, is presented and used to illustrate those solutions.", acknowledgement = ack-nhfb, classification = "723; 901", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "artificial intelligence; conceptual memory; database retrieval; fact retrieval; information science; natural language processing; question answering", subject = "Information Systems --- Information Storage and Retrieval --- Content Analysis and Indexing (H.3.1); Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3); Computing Methodologies --- Artificial Intelligence --- Knowledge Representation Formalisms and Methods (I.2.4)", } @Article{Bernstein:1983:MCC, author = "Philip A. Bernstein and Nathan Goodman", title = "Multiversion Concurrency Control --- Theory and Algorithms", journal = j-TODS, volume = "8", number = "4", pages = "465--483", month = dec, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "86m:68025", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/Discrete.event.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-4/p465-bernstein/p465-bernstein.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-4/p465-bernstein/", abstract = "Concurrency control is the activity of synchronizing operations issued by concurrently executing programs on a shared database. The goal is to produce an execution that has the same effect as a serial (noninterleaved) one. In a multiversion database system, each write on a data item produces a new copy (or {\em version\/}) of that data item. This paper presents a theory for analyzing the correctness of concurrency control algorithms for multiversion database systems. We use the theory to analyze some new algorithms and some previously published ones.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming --- Algorithms; database systems; transaction processing", subject = "Information Systems --- Database Management --- Systems (H.2.4)", } @Article{Lynch:1983:MAN, author = "Nancy A. Lynch", title = "Multilevel Atomicity --- {A} New Correctness Criterion for Database Concurrency Control", journal = j-TODS, volume = "8", number = "4", pages = "484--502", month = dec, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "86j:68022", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-4/p484-lynch/p484-lynch.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-4/p484-lynch/", abstract = "{\em Multilevel atomicity}, a new correctness criteria for database concurrency control, is defined. It weakens the usual notion of serializability by permitting controlled interleaving among transactions. It appears to be especially suitable for applications in which the set of transactions has a natural hierarchical structure based on the hierarchical structure of an organization. A characterization for multilevel atomicity, in terms of the absence of cycles in a dependency relation among transaction steps, is given. Some remarks are made concerning implementation.", acknowledgement = ack-nhfb, annote = "A weaker level of concurrency control than transaction serializability, a generalization of Garc{\'\i}a-Molina [1981].", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "atomicity; breakpoint; database systems; transaction", subject = "Software --- Programming Languages --- Language Constructs and Features (D.3.3): {\bf Concurrent programming structures}", } @Article{Hecht:1983:SMF, author = "Matthew S. Hecht and John D. Gabbe", title = "Shadowed Management of Free Disk Pages with a Linked List", journal = j-TODS, volume = "8", number = "4", pages = "503--514", month = dec, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-4/p503-hecht/p503-hecht.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-4/p503-hecht/", abstract = "We describe and prove correct a programming technique using a linked list of pages for managing the free disk pages of a file system where shadowing is the recovery technique. Our technique requires a window of only two pages of main memory for accessing and maintaining the free list, and avoids wholesale copying of free-list pages during a {\em checkpoint\/} or {\em recover\/} operation.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "checkpoint; computer operating systems; computer programming; database systems; dynamic storage allocation; file system; recovery; shadowing; storage management", subject = "Software --- Operating Systems --- Storage Management (D.4.2): {\bf Allocation/deallocation strategies}; Software --- Operating Systems --- Reliability (D.4.5): {\bf Checkpoint/restart}", } @Article{Malhotra:1983:EIA, author = "A. Malhotra and H. M. Markowitz and D. P. Pazel", title = "{EAS-E}: An Integrated Approach to Application Development", journal = j-TODS, volume = "8", number = "4", pages = "515--542", month = dec, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-4/p515-malhotra/p515-malhotra.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-4/p515-malhotra/", abstract = "{\em EAS-E\/} (pronounced EASY) is an experimental programming language integrated with a database management system now running on VM/370 at the IBM Thomas J. Watson Research Center. The EAS-E programming language is built around the entity, attribute, and set ({\em EAS\/}) view of application development. It provides a means for translating operations on EAS structures directly into executable code. EAS-E commands have an English-like syntax, and thus EAS-E programs are easy to read and understand. EAS-E programs are also more compact than equivalent programs in other database languages.\par The EAS-E database management system allows many users simultaneous access to the database. It supports locking and deadlock detection and is capable of efficiently supporting network databases of various sizes including very large databases, consisting of several millions of entities stored on multiple DASD extends. Also available is a nonprocedural facility that allows a user to browse and update the database without writing programs.", acknowledgement = ack-nhfb, annote = "ER model based tool", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming languages; database systems, TODS E/R model; entity relationship model", subject = "Information Systems --- Database Management --- Languages (H.2.3): {\bf Data description languages (DDL)}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Data manipulation languages (DML)}; Information Systems --- Database Management --- Systems (H.2.4); Software --- Software Engineering --- Programming Environments (D.2.6)", } @Article{Moran:1983:CDO, author = "Shlomo Moran", title = "On the Complexity of Designing Optimal Partial-Match Retrieval Systems", journal = j-TODS, volume = "8", number = "4", pages = "543--551", month = dec, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P20 (68P10)", MRnumber = "86j:68024", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; Graphics/siggraph/83.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-4/p543-moran/p543-moran.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-4/p543-moran/", abstract = "We consider the problem of designing an information retrieval system on which partial match queries have to be answered. Each record in the system consists of a list of {\em attributes}, and a partial match query specifies the values of some of the attributes. The records are stored in {\em buckets\/} in a secondary memory, and in order to answer a partial match query all the buckets that may contain a record satisfying the specifications of that query must be retrieved. The bucket in which a given record is stored is found by a multiple key hashing function, which maps each attribute to a string of a fixed number of bits. The address of that bucket is then represented by the string obtained by concatenating the strings on which the various attributes were mapped. A partial match query may specify only part of the bits in the string representing the address, and the larger the number of bits specified, the smaller the number of buckets that have to be retrieved in order to answer the query. \par The optimization problem considered in this paper is that of deciding to how many bits each attribute should be mapped by the bashing function above, so that the expected number of buckets retrieved per query is minimized. Efficient solutions for special cases of this problem have been obtained in [1], [12], and [14]. It is shown that in general the problem is NP-hard, and that if $P$ NP, it is also not fully approximable. Two heuristic algorithms for the problem are also given and compared.", acknowledgement = ack-nhfb, annote = "Optimal variable bit lengths of hashstrings, it is NP-hard.", classification = "723; 901; 922", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "approximation algorithms; computer programming --- algorithms; file organization; hashing; information science; NP-hard problems; optimization, TODS hashing, searching; partial match retrieval; searching", oldlabel = "geom-947", review = "ACM CR 8411-0954", subject = "Theory of Computation --- Analysis of Algorithms and Problem Complexity --- Nonnumerical Algorithms and Problems (F.2.2); Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3)", } @Article{Ramamohanarao:1983:PMR, author = "K. Ramamohanarao and John W. Lloyd and James A. Thom", title = "Partial-Match Retrieval using Hashing and Descriptors", journal = j-TODS, volume = "8", number = "4", pages = "552--576", month = dec, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P10", MRnumber = "794 538", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; Graphics/siggraph/83.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-4/p552-ramamohanarao/p552-ramamohanarao.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-4/p552-ramamohanarao/", abstract = "This paper studies a partial-match retrieval scheme based on hash functions and descriptors. The emphasis is placed on showing how the use of a descriptor file can improve the performance of the scheme. Records in the file are given addresses according to hash functions for each field in the record. Furthermore, each page of the file has associated with it a descriptor, which is a fixed-length bit string, determined by the records actually present in the page. Before a page is accessed to see if it contains records in the answer to a query, the descriptor for the page is checked. This check may show that no relevant records are on the page and, hence, that the page does not have to be accessed. The method is shown to have a very substantial performance advantage over pure hashing schemes, when some fields in the records have large key spaces. A mathematical model of the scheme, plus an algorithm for optimizing performance, is given.", acknowledgement = ack-nhfb, classification = "723; 901; 921", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming --- Algorithms; DATA PROCESSING --- File Organization; descriptors; dynamic file; hashing; information science; MATHEMATICAL MODELS; OPTIMIZATION; optimization; partial-match retrieval", oldlabel = "geom-948", subject = "Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Access methods}; Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}", } @Article{Ceri:1983:CQE, author = "S. Ceri and G. Pelagatti", title = "Correctness of Query Execution Strategies in Distributed Databases", journal = j-TODS, volume = "8", number = "4", pages = "577--607", month = dec, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-4/p577-ceri/p577-ceri.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-4/p577-ceri/", abstract = "A major requirement of a Distributed DataBase Management System (DDBMS) is to enable users to write queries as though the database were not distributed (distribution transparency). The DDBMS transforms the user's queries into execution strategies, that is, sequences of operations on the various nodes of the network and of transmissions between them. An execution strategy on a distributed database is correct if it returns the same result as if the query were applied to a nondistributed database.\par This paper analyzes the correctness problem for query execution strategies. A formal model, called Multirelational Algebra, is used as a unifying framework for this purpose. The problem of proving the correctness of execution strategies is reduced to the problem of proving the equivalence of two expressions of Multirelational Algebra. A set of theorems on equivalence is given in order to facilitate this task. \par The proposed approach can be used also for the generation of correct execution strategies, because it defines the rules which allow the transformation of a correct strategy into an equivalent one. This paper does not deal with the problem of evaluating equivalent strategies, and therefore is not in itself a proposal for a query optimizer for distributed databases. However, it constitutes a theoretical foundation for the design of such optimizers.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "correctness of database access; database systems; distributed database access; read-only transactions; relational algebra", subject = "Computer Systems Organization --- Computer-Communication Networks --- Distributed Systems (C.2.4): {\bf Distributed databases}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Query languages}; Information Systems --- Database Management --- Systems (H.2.4); Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}", } @Article{Biskup:1983:FCR, author = "Joachim Biskup", title = "A Foundation of {Codd}'s Relational Maybe Operators", journal = j-TODS, volume = "8", number = "4", pages = "608--636", month = dec, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15 (03B70)", MRnumber = "86j:68019", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Ai/nonmono.bib; Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-4/p608-biskup/p608-biskup.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-4/p608-biskup/", abstract = "Database relations which possibly contain maybe-tuples and null values of type ``value at present unknown'' are studied. Maybe-tuples and null values are formally interpreted by our notion of {\em representation}, which uses classical notions of predicate logic, elaborates Codd's proposal of maybe-tuples, and adopts Reiter's concept of a closed world. Precise notions of {\em information content\/} and {\em redundancy}, associated with our notion of representation, are investigated. {\em Extensions of the relational algebra\/} to relations with maybe-tuples and null values are proposed. Our extensions are essentially Codd's, with some modifications. It is proved that these extensions have natural properties which are formally stated as being {\em adequate\/} and {\em restricted}.\par By the treatment of difference and division, our formal framework can be used even for operations that require ``negative information.'' Finally, extensions of {\em update operations\/} are discussed.", acknowledgement = ack-nhfb, annote = "Join, Project, Select, Union, Difference is defined for nulls which are not labeled, results include Maybe. Division does not work. Update is discussed.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "closed world assumption; database systems; information content; maybe-tuple; negative information; null value; open word assumption; redundancy; relational algebra; relational database; representation", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}; Computing Methodologies --- Artificial Intelligence --- Knowledge Representation Formalisms and Methods (I.2.4): {\bf Predicate logic}", } @Article{Ullman:1983:KCA, author = "Jeffrey D. Ullman", title = "On {Kent}'s {``Consequences of assuming a universal relation''} ({Technical} correspondence)", journal = j-TODS, volume = "8", number = "4", pages = "637--643", month = dec, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/database.bib; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "See \cite{Kent:1981:CAU}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-4/p637-ullman/p637-ullman.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-4/p637-ullman/", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "TODS technical correspondence", subject = "Information Systems --- Database Management --- General (H.2.0)", } @Article{Kent:1983:URR, author = "William Kent", title = "The universal relation revisited (technical correspondence)", journal = j-TODS, volume = "8", number = "4", pages = "644--648", month = dec, year = "1983", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/database.bib; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1983-8-4/p644-kent/p644-kent.pdf; http://www.acm.org/pubs/citations/journals/tods/1983-8-4/p644-kent/", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "TODS technical correspondence", subject = "Information Systems --- Database Management --- General (H.2.0)", } @Article{Kaplan:1984:DPN, author = "S. Jerrold Kaplan", title = "Designing a Portable Natural Language Database Query System", journal = j-TODS, volume = "9", number = "1", pages = "1--19", month = mar, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Ai/ai.misc.bib; Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-1/p1-kaplan/p1-kaplan.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-1/p1-kaplan/", abstract = "One barrier to the acceptance of natural language database query systems is the substantial installation effort required for each new database. Much of this effort involves the encoding of semantic knowledge for the domain of discourse, necessary to correctly interpret and respond to natural language questions. For such systems to be practical, techniques must be developed to increase their portability to new domains. \par This paper discusses several issues involving the portability of natural language interfaces to database systems, and presents the approach taken in {\em CO-OP\/} -- a natural language database query system that provides cooperative responses to English questions and operates with a typical CODA-SYL database system. {\em CO-OP\/} derives its domain-specific knowledge from a {\em lexicon\/} (the list of words known to the system) and the information already present in the structure and content of the underlying database. Experience with the implementation suggests that strategies that are not directly derivative of cognitive or linguistic models may nonetheless play an important role in the development of practical natural language systems.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database systems", } @Article{Reiss:1984:PDS, author = "Steven P. Reiss", title = "Practical Data-Swapping: The First Steps", journal = j-TODS, volume = "9", number = "1", pages = "20--37", month = mar, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-1/p20-reiss/p20-reiss.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-1/p20-reiss/", abstract = "The problem of statistical database confidentiality in releasing microdata is addressed through the use of approximate data-swapping. Here, a portion of the microdata is replaced with a database that has been selected with approximately the same statistics. The result guarantees the confidentiality of the original data, while providing microdata with accurate statistics. Methods for achieving such transformations are considered and analyzed through simulation.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Security", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database systems, TODS statistical databases", subject = "Data --- Coding and Information Theory (E.4): {\bf Nonsecret encoding schemes**}; Information Systems --- Models and Principles --- Systems and Information Theory (H.1.1): {\bf Value of information}; Information Systems --- Information Storage and Retrieval --- Online Information Services (H.3.5): {\bf Data sharing}", } @Article{Nievergelt:1984:GFA, author = "J. Nievergelt and Hans Hinterberger and Kenneth C. Sevcik", title = "The Grid File: An Adaptable, Symmetric Multikey File Structure", journal = j-TODS, volume = "9", number = "1", pages = "38--71", month = mar, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-1/p38-nievergelt/p38-nievergelt.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-1/p38-nievergelt/", abstract = "Traditional file structures that provide multikey access to records, for example, inverted files, are extensions of file structures originally designed for single-key access. They manifest various deficiencies in particular for multikey access to highly dynamic files. We study the dynamic aspects of file structures that treat all keys symmetrically, that is, file structures which avoid the distinction between primary and secondary keys. We start from a bitmap approach and treat the problem of file design as one of data compression of a large sparse matrix. This leads to the notions of a {\em grid partition\/} of the search space and of a {\em grid directory}, which are the keys to a dynamic file structure called the {\em grid file}. This file system adapts gracefully to its contents under insertions and deletions, and thus achieves an upper bound of two disk accesses for single record retrieval; it also handles range queries and partially specified queries efficiently. We discuss in detail the design decisions that led to the grid file, present simulation results of its behavior, and compare it to other multikey access file structures.", acknowledgement = ack-nhfb, annote = "Grid files use a vector of hash-keys, partition the result into clusters, and store the clusters into blocks. Two accesses are used for retrieval. Update may be more costly. Access structures fit in core?", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data processing, TODS multidimensional hashing, multidimensional hashing", review = "ACM CR 8411-0931", } @Article{Buchanan:1984:DMS, author = "Jack R. Buchanan and Richard D. Fennell and Hanan Samet", title = "A Database Management System for the {Federal Courts}", journal = j-TODS, volume = "9", number = "1", pages = "72--88", month = mar, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-1/p72-buchanan/p72-buchanan.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-1/p72-buchanan/", abstract = "A judicial systems laboratory has been established and several large-scale information management systems projects have been undertaken within the Federal Judicial Center in Washington, D.C. The newness of the court application area, together with the experimental nature of the initial prototypes, required that the system building tools be as flexible and efficient as possible for effective software design and development. The size of the databases, the expected transaction volumes, and the long-term value of the court records required a data manipulation system capable of providing high performance and integrity. The resulting design criteria, the programming capabilities developed, and their use in system construction are described herein. This database programming facility has been especially designed as a technical management tool for the database administrator, while providing the applications programmer with a flexible database software interface for high productivity. \par Specifically, a network-type database management system using SAIL as the data manipulation host language is described. Generic data manipulation verb formats using SAIL's macro facilities and dynamic data structuring facilities allowing in-core database representations have been developed to achieve a level of flexibility not usually attained in conventional database systems.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database systems, TODS SAIL, network model", } @Article{Papadimitriou:1984:CCM, author = "Christos H. Papadimitriou and Paris C. Kanellakis", title = "On Concurrency Control by Multiple Versions", journal = j-TODS, volume = "9", number = "1", pages = "89--99", month = mar, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-1/p89-papadimitriou/p89-papadimitriou.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-1/p89-papadimitriou/", abstract = "We examine the problem of concurrency control when the database management system supports multiple versions of the data. We characterize the limit of the parallelism achievable by the multiversion approach and demonstrate the resulting space-parallelism trade-off.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database systems", } @Article{Shultz:1984:RTA, author = "Roger K. Shultz and Roy J. Zingg", title = "Response Time Analysis of Multiprocessor Computers for Database Support", journal = j-TODS, volume = "9", number = "1", pages = "100--132", month = mar, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-1/p100-shultz/p100-shultz.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-1/p100-shultz/", abstract = "Comparison of three multiprocessor computer architectures for database support is made possible through evaluation of response time expressions. These expressions are derived by parameterizing algorithms performed by each machine to execute a relational algebra query. Parameters represent properties of the database and components of the machines. Studies of particular parameter values exhibit response times for conventional machine technology, for low selectivity, high duplicate occurrence, and parallel disk access, increasing the number of processors, and improving communication and processing technology.", acknowledgement = ack-nhfb, annote = "analyzes DIRECT, HYPERTREE, and REPT, their own proposal.", classification = "722; 723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "COMPUTER SYSTEMS, DIGITAL --- multiprocessing, TODS relational model, relational queries, direct, hypertree, rept; database systems", } @Article{Valduriez:1984:JSA, author = "Patrick Valduriez and Georges Gardarin", title = "Join and Semijoin Algorithms for a Multiprocessor Database Machine", journal = j-TODS, volume = "9", number = "1", pages = "133--161", month = mar, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-1/p133-valduriez/p133-valduriez.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-1/p133-valduriez/", abstract = "This paper presents and analyzes algorithms for computing joins and semijoins of relations in a multiprocessor database machine. First, a model of the multiprocessor architecture is described, incorporating parameters defining I/O, CPU, and message transmission times that permit calculation of the execution times of these algorithms. Then, three join algorithms are presented and compared. It is shown that, for a given configuration, each algorithm has an application domain defined by the characteristics of the operand and result relations. Since a semijoin operator is useful for decreasing I/O and transmission times in a multiprocessor system, we present and compare two equi-semijoin algorithms and one non-equi-semijoin algorithm. The execution times of these algorithms are generally linearly proportional to the size of the operand and result relations, and inversely proportional to the number of processors. We then compare a method which consists of joining two relations to a method whereby one joins their semijoins. Finally, it is shown that the latter method, using semijoins, is generally better. The various algorithms presented are implemented in the SABRE database system; an evaluation model selects the best algorithm for performing a join according to the results presented here. A first version of the SABRE system is currently operational at INRIA.", acknowledgement = ack-nhfb, classification = "722; 723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "COMPUTER SYSTEMS, DIGITAL --- Multiprocessing; database systems", } @Article{Christodoulakis:1984:ICA, author = "S. Christodoulakis", title = "Implications of Certain Assumptions in Database Performance Evaluation", journal = j-TODS, volume = "9", number = "2", pages = "163--186", month = jun, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "86k:68011", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-2/p163-christodoulakis/p163-christodoulakis.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-2/p163-christodoulakis/", abstract = "The assumptions of uniformity and independence of attribute values in a file, uniformity of queries, constant number of records per block, and random placement of qualifying records among the blocks of a file are frequently used in database performance evaluation studies. In this paper we show that these assumptions often result in predicting only an upper bound of the expected system cost. We then discuss the implications of nonrandom placement, nonuniformity, and dependencies of attribute values on database design and database performance evaluation.", acknowledgement = ack-nhfb, annote = "After a somewhat cursory reading of the paper --- A few comments: A. Uniform distributions, particularly for parallel machines, may imply uniform distribution of work over the machines --- thus giving an upper bound for speedup --- thus a uniform distribution is an optimistic assumption. B. For uniprocessor systems -- the assumption of a uniform dist. is optimistic when: 1. hashing --- fewer collisions, shorter lists at collisions should be expected from a uniform distribution. 2. trees --- more balancing costs may be incurred for non-uniform distributions. 3. searching --- for example, a binary search on a non-uniform could cost significantly more. 4. sorting --- I suspect that uniform distributions are optimal for some sorting methods, although I haven't looked at this in any detail.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database systems, selectivity cost estimation approx TODS", } @Article{Effelsberg:1984:LIP, author = "Wolfgang Effelsberg and Mary E. S. Loomis", title = "Logical, Internal, and Physical Reference Behavior in {CODASYL} Database Systems", journal = j-TODS, volume = "9", number = "2", pages = "187--213", month = jun, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-2/p187-effelsberg/p187-effelsberg.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-2/p187-effelsberg/", abstract = "This work investigates one aspect of the performance of CODASYL database systems: the data reference behavior. We introduce a model of database traversals at three levels: the logical, internal, and physical levels. The mapping between the logical and internal levels is defined by the internal schema, whereas the mapping between the internal and the physical levels depends on cluster properties of the database. Our model explains the physical reference behavior for a given sequence of DML statements at the logical level. \par Software has been implemented to monitor references in two selected CODASYL DBMS applications. In a series of experiments the physical reference behavior was observed for varying internal schemas and cluster properties of the database. The measurements were limited to retrieval transactions, so that a variety of queries could be analyzed for the same well-known state of the database. Also, all databases were relatively small in order to allow fast reloading with varying internal schema parameters. In all cases, the database transactions showed less locality of reference than do programs under virtual memory operating systems; some databases showed no locality at all. No evidence of physical sequentiality was found. This suggests that standard page replacement strategies are not optimal for CODASYL database buffer management; instead, replacement decisions in a database buffer should be based on specific knowledge available from higher system layers.", acknowledgement = ack-nhfb, acmcrnumber = "8506 0534", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database systems, TODS buffer management", subject = "Information Systems --- Database Management --- Systems (H.2.4); Computer Systems Organization --- Performance of Systems (C.4): {\bf Measurement techniques}; Software --- Operating Systems --- Storage Management (D.4.2): {\bf Storage hierarchies}; Software --- Operating Systems --- Performance (D.4.8): {\bf Measurements}; Software --- Operating Systems --- Performance (D.4.8): {\bf Modeling and prediction}; Information Systems --- Database Management --- Physical Design (H.2.2); Information Systems --- Database Management --- Logical Design (H.2.1)", } @Article{Kim:1984:PPR, author = "Won Kim and Daniel Gajski and David J. Kuck", title = "A Parallel Pipelined Relational Query Processor", journal = j-TODS, volume = "9", number = "2", pages = "214--242", month = jun, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-2/p214-kim/p214-kim.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-2/p214-kim/", abstract = "This paper presents the design of a relational query processor. The query processor consists of only four processing PIPEs and a number of random-access memory modules. Each PIPE processes tuples of relations in a bit-serial, tuple-parallel manner for each of the primitive database operations which comprise a complex relational query. The design of the query processor meets three major objectives: the query processor must be manufacturable using existing and near-term LSI (VLSI) technology; it must support in a uniform manner both the numeric and nonnumeric processing requirements a high-level user interface like SQL presents; and it must support the query-processing strategy derived in the query optimizer to satisfy certain system-wide performance optimality criteria.", acknowledgement = ack-nhfb, classification = "722; 723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer architecture; database systems; pipeline processing; relational query processor, hardware support database machine TODS", subject = "Computer Systems Organization --- Processor Architectures --- Other Architecture Styles (C.1.3): {\bf High-level language architectures**}; Information Systems --- Database Management --- Database Machines (H.2.6)", } @Article{Al-Suwaiyel:1984:ATC, author = "M. Al-Suwaiyel and E. Horowitz", title = "Algorithms for Trie Compaction", journal = j-TODS, volume = "9", number = "2", pages = "243--263", month = jun, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P05", MRnumber = "794 541", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/citations/journals/tods/1984-9-2/p243-ai-suwaiyel/", abstract = "The trie data structure has many properties which make it especially attractive for representing large files of data. These properties include fast retrieval time, quick unsuccessful search determination, and finding the longest match to a given identifier. The main drawback is the space requirement. In this paper the concept of trie compaction is formalized. An exact algorithm for optimal trie compaction and three algorithms for approximate trie compaction are given, and an analysis of the three algorithms is done. The analyses indicate that for actual tries, reductions of around 70 percent in the space required by the uncompacted trie can be expected. The quality of the compaction is shown to be insensitive to the number of nodes, while a more relevant parameter is the alphabet size of the key.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data processing", subject = "Data --- Data Structures (E.1); Data --- Data Storage Representations (E.2); Data --- Coding and Information Theory (E.4): {\bf Data compaction and compression}; Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2); Theory of Computation --- Analysis of Algorithms and Problem Complexity --- Nonnumerical Algorithms and Problems (F.2.2)", } @Article{Mendelzon:1984:DST, author = "Alberto O. Mendelzon", title = "Database States and Their Tableaux", journal = j-TODS, volume = "9", number = "2", pages = "264--282", month = jun, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "794 542", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-2/p264-mendelzon/p264-mendelzon.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-2/p264-mendelzon/", abstract = "Recent work considers a database state to satisfy a set of dependencies if there exists a satisfying universal relation whose projections contain each of the relations in the state. Such relations are called {\em weak instances\/} for the state. We propose the set of all weak instances for a state as an embodiment of the information represented by the state. We characterize states that have the same set of weak instances by the equivalence of their associated tableaux. We apply this notion to the comparison of database schemes and characterize all pairs of schemes such that for every legal state of one of them there exists an equivalent legal state of the other one. We use this approach to provide a new characterization of Boyce-Codd Normal Form relation schemes.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "TODS weak instance assumption, database systems", } @Article{Maier:1984:FUR, author = "David Maier and Jeffrey D. Ullman and Moshe Y. Vardi", title = "On the Foundations of the Universal Relation Model", journal = j-TODS, volume = "9", number = "2", pages = "283--308", month = jun, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "86m:68031", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-2/p283-maier/p283-maier.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-2/p283-maier/", abstract = "Two fundamentally different approaches to the universal relation model have been taken. According to the first approach, the user's view of the database is a universal relation or many universal relations, about which the user poses queries. The second approach sees the model as having query-processing capabilities that relieve the user of the need to specify the logical access path. Thus, while the first approach gives a denotational semantics to query answering, the second approach gives it an operational semantics. The authors investigate the relationship between these two approaches.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Languages; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database systems", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Query languages}; Computing Methodologies --- Artificial Intelligence --- Deduction and Theorem Proving (I.2.3); Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Normal forms}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}", } @Article{Katsuno:1984:ECF, author = "Hirofumi Katsuno", title = "An Extension of Conflict-free Multi-valued Dependency Sets", journal = j-TODS, volume = "9", number = "2", pages = "309--326", month = jun, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "86m:68029", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-2/p309-katsuno/p309-katsuno.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-2/p309-katsuno/", abstract = "Several researchers (Beeri, Bernstein, Chiu, Fagin, Goodman, Maier, Mendelzon, Ullman, and Yannakakis) have introduced a special class of database schemes, called {\em acyclic\/} or {\em tree\/} schemes. Beeri et al. have shown that an acyclic join dependency, naturally defined by an acyclic database scheme, has several desirable properties, and that an acyclic join dependency is equivalent to a conflict-free set of multivalued dependencies. However, since their results are confined to multivalued and join dependencies, it is not clear whether we can handle functional dependencies independently of other dependencies. \par In the present paper we define an extension of a conflict-free set, called an {\em extended conflict-free set}, including multivalued dependencies and functional dependencies, and show the following two properties of an extended conflict-free set:\par There are three equivalent definitions of an extended conflict-free set. One of them is defined as a set including an acyclic joint dependency and a set of functional dependencies such that the left and right sides of each functional dependency are included in one of the attribute sets that construct the acyclic join dependency.\par For a relation scheme with an extended conflict-free set, there is a decomposition into third normal form with a lossless join and preservation of dependencies.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database systems", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Schema and subschema}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Normal forms}", } @Article{Korth:1984:SUD, author = "Henry F. Korth and Gabriel M. Kuper and Joan Feigenbaum and Allen {Van Gelder} and Jeffrey D. Ullman", title = "{System/U}: a Database System Based on the Universal Relation Assumption", journal = j-TODS, volume = "9", number = "3", pages = "331--347", month = sep, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-3/p331-korth/p331-korth.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-3/p331-korth/", abstract = "System/U is a universal relation database system under development at Stanford University which uses the language C on UNIX. The system is intended to test the use of the universal view, in which the entire database is seen as one relation. This paper describes the theory behind System/U, in particular the theory of maximal objects and the connection between a set of attributes. We also describe the implementation of the DDL (Data Description Language) and the DML (Data Manipulation Language), and discuss in detail how the DDL finds maximal objects and how the DML determines the connection between the attributes that appear in a query.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming languages; database systems; RELATIONAL DATABASE; SYSTEM/U; UNIVERSAL RELATION", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Data description languages (DDL)}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Data manipulation languages (DML)}; Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Access methods}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}; Mathematics of Computing --- Discrete Mathematics --- Graph Theory (G.2.2): {\bf Path and circuit problems}", } @Article{Wald:1984:RQI, author = "Joseph A. Wald and Paul G. Sorenson", title = "Resolving the Query Inference Problem Using {Steiner} Trees", journal = j-TODS, volume = "9", number = "3", pages = "348--368", month = sep, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; Graphics/siggraph/86.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-3/p348-wald/p348-wald.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-3/p348-wald/", abstract = "The query inference problem is to translate a sentence of a query language into an unambiguous representation of a query. A query is represented as an expression over a set of query trees. A metric is introduced for measuring the complexity of a query and also a proposal that a sentence be translated into the least complex query which `satisfies' the sentence. This method of query inference can be used to resolve ambiguous sentences and leads to easier formulation of sentences.", acknowledgement = ack-nhfb, annote = "MDCST resolves queries over attributes using a schema tree.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Languages; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database systems; query inference problem; Steiner trees, TODS E/R model", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Transaction processing}; Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Query formulation}; Mathematics of Computing --- Discrete Mathematics --- Graph Theory (G.2.2): {\bf Graph algorithms}; Mathematics of Computing --- Discrete Mathematics --- Graph Theory (G.2.2): {\bf Trees}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Query languages}", } @Article{Ramamohanarao:1984:RLH, author = "K. Ramamohanarao and R. Sacks-Davis", title = "Recursive Linear Hashing", journal = j-TODS, volume = "9", number = "3", pages = "369--391", month = sep, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P10", MRnumber = "794 545", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-3/p369-ramamohanarao/p369-ramamohanarao.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-3/p369-ramamohanarao/", abstract = "A modification of linear hashing is proposed for which the conventional use of overflow records is avoided. Furthermore, an implementation of linear hashing is presented for which the amount of physical storage claimed is only fractionally more than the minimum required. This implementation uses a fixed amount of in-core space. Simulation results are given which indicate that even for storage utilizations approaching 95 percent, the average successful search cost for this method is close to one disk access.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "COMPUTER SIMULATION; data processing; DYNAMIC FILES; LINEAR HASHING", subject = "Data --- Data Storage Representations (E.2): {\bf Hash-table representations}; Software --- Operating Systems --- File Systems Management (D.4.3): {\bf File organization}; Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}; Computing Methodologies --- Simulation and Modeling --- Applications (I.6.3); Software --- Operating Systems --- Storage Management (D.4.2): {\bf Secondary storage}", } @Article{Cooper:1984:ATU, author = "Robert B. Cooper and Martin K. Solomon", title = "The Average Time Until Bucket Overflow", journal = j-TODS, volume = "9", number = "3", pages = "392--408", month = sep, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-3/p392-cooper/p392-cooper.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-3/p392-cooper/", abstract = "It is common for file structures to be divided into equal-length partitions, called buckets, into which records arrive for insertion and from which records are physically deleted. We give a simple algorithm which permits calculation of the average time until overflow for a bucket of capacity $n$ records, assuming that record insertions and deletions can be modeled as a stochastic process in the usual manner of queueing theory. We present some numerical examples, from which we make some general observations about the relationships among insertion and deletion rates, bucket capacity, initial fill, and average time until overflow. In particular, we observe that it makes sense to define the {\em stable point\/} as the product of the arrival rate and the average residence time of the records; then a bucket tends to fill up to its stable point quickly, in an amount of time almost independent of the stable point, but the average time until overflow increases rapidly with the difference between the bucket capacity and the stable point.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "BUCKET OVERFLOW; data processing", } @Article{March:1984:SER, author = "Salvatore T. March and Gary D. Scudder", title = "On the Selection of Efficient Record Segmentations and Backup Strategies for Large Shared Databases", journal = j-TODS, volume = "9", number = "3", pages = "409--438", month = sep, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-3/p409-march/p409-march.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-3/p409-march/", abstract = "In recent years the information processing requirements of business organizations have expanded tremendously. With this expansion, the design of databases to efficiently manage and protect business information has become critical. We analyze the impacts of {\em record segmentation\/} (the assignment of data items to segments defining subfiles), an efficiency-oriented design technique, and of {\em backup and recovery strategies}, a data protection technique, on the overall process of database design. A combined record segmentation/backup and recovery procedure is presented and an application of the procedure is discussed. Results in which problem characteristics are varied along three dimensions: update frequencies, available types of access paths, and the predominant type of data retrieval that must be supported by the database, are presented.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "backup strategies; database systems; large shared databases; record segmentations", subject = "Data --- Files (E.5): {\bf Organization/structure}; Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}; Software --- Operating Systems --- Reliability (D.4.5): {\bf Backup procedures}; Software --- Operating Systems --- File Systems Management (D.4.3): {\bf File organization}; Data --- Files (E.5): {\bf Backup/recovery}; Information Systems --- Database Management --- Database Administration (H.2.7): {\bf Logging and recovery}", } @Article{Manber:1984:CCD, author = "Udi Manber and Richard E. Ladner", title = "Concurrency Control in a Dynamic Search Structure", journal = j-TODS, volume = "9", number = "3", pages = "439--455", month = sep, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68N25 (68P15)", MRnumber = "794 546", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: ACM Proc. on Database Systems, Boston, Apr. 1982, pp. 268--282.", URL = "http://www.acm.org/pubs/citations/journals/tods/1984-9-3/p439-manbar/", abstract = "A design of a data structure and efficient algorithms for concurrent manipulations of a dynamic search structure by independent user processes is presented in this paper. The algorithms include updating data, inserting new elements, and deleting elements. The algorithms support a high level of concurrency. Each of the operations listed above requires only constant amount of locking. In order to make the system even more efficient for the user processes, maintenance processes are introduced. The maintenance processes operate independently in the background to reorganize the data structure and ``clean up'' after the (more urgent) user processes. A proof of correctness of the algorithms is given and some experimental results and extensions are examined.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming --- Algorithms; concurrency control; data processing; dynamic search structure", } @Article{Davidson:1984:OCP, author = "Susan B. Davidson", title = "Optimism and Consistency in Partitioned Distributed Database Systems", journal = j-TODS, volume = "9", number = "3", pages = "456--481", month = sep, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "794 547", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-3/p456-davidson/p456-davidson.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-3/p456-davidson/", abstract = "A protocol for transaction processing during partition failures is presented which guarantees mutual consistency between copies of data-items after repair is completed. The protocol is ``optimistic'' in that transactions are processed without restrictions during failure; conflicts are then detected at repair time using a {\em precedence graph}, and are resolved by backing out transactions according to some {\em backout strategy}. The resulting database state then corresponds to a serial execution of some subset of transactions run during the failure. Results from simulation and probabilistic modeling show that the optimistic protocol is a reasonable alternative in many cases. Conditions under which the protocol performs well are noted, and suggestions are made as to how performance can be improved. In particular, a backout strategy is presented which takes into account individual transaction costs and attempts to minimize total backout cost. Although the problem of choosing transactions to minimize total backout cost is, in general, NP-complete, the backout strategy is efficient and produces very good results.", acknowledgement = ack-nhfb, annote = "Counter example to uniform is last hypothesis of Christodoulakis.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database systems; DISTRIBUTED database systems", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Distributed databases}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Transaction processing}; Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Recovery and restart}; Mathematics of Computing --- Discrete Mathematics --- Graph Theory (G.2.2)", } @Article{Ibaraki:1984:ONO, author = "Toshihide Ibaraki and Tiko Kameda", title = "On the Optimal Nesting Order for Computing $ {N} $-Relational Joins", journal = j-TODS, volume = "9", number = "3", pages = "482--502", month = sep, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "794 548", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-3/p482-ibaraki/p482-ibaraki.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-3/p482-ibaraki/", abstract = "Using the nested loops method, this paper addresses the problem of minimizing the number of page fetches necessary to evaluate a given query to a relational database. We first propose a data structure whereby the number of page fetches required for query evaluation is substantially reduced and then derive a formula for the expected number of page fetches. An optimal solution to our problem is the nesting order of relations in the evaluation program, which minimizes the number of page fetches. Since the minimization of the formula is NP-hard, as shown in the Appendix, we propose a heuristic algorithm which produces a good suboptimal solution in polynomial time. For the special case where the input query is a ``tree query,'' we present an efficient algorithm for finding an optimal nesting order.", acknowledgement = ack-nhfb, acmcrnumber = "8506 0535", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "N-relational joins; optimal nesting order; TODS query optimization processing relational model, database systems", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Transaction processing}; Theory of Computation --- Analysis of Algorithms and Problem Complexity --- Nonnumerical Algorithms and Problems (F.2.2); Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}", } @Article{Elhardt:1984:DCH, author = "Klaus Elhardt and Rudolf Bayer", title = "A Database Cache for High Performance and Fast Restart in Database Systems", journal = j-TODS, volume = "9", number = "4", pages = "503--525", month = dec, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-4/p503-elhardt/p503-elhardt.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-4/p503-elhardt/", abstract = "Performance in database systems is strongly influenced by buffer management and transaction recovery methods. This paper presents the principles of the database cache, which replaces the traditional buffer. In comparison to buffer management, cache management is more carefully coordinated with transaction management, and integrates transaction recovery. High throughput of \par small- and medium-sized transactions is achieved by fast commit processing and low database traffic. Very fast handling of transaction failures and short restart time after system failure are guaranteed in such an environment. Very long retrieval and update transactions are also supported.", acknowledgement = ack-nhfb, affiliation = "Technische Univ, Muenchen, Inst fuer Informatik, Munich, West Ger", affiliationaddress = "Technische Univ, Muenchen, Inst fuer Informatik, Munich, West Ger", annote = "The Elhardt-Bayer cache does indeed resemble Alpine in many important respects. The primary difference is that it requires all of a transaction's updates to be written to the log in contiguous log pages, which allows some compact encodings to be used in representing the log, but also means that more work must be done synchronously at commit time. Also, their scheme is not designed to support two-phase commit, and extending it to handle two-phase commit is sure to mess up the pretty log encoding, I think. ---Mark Brown.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "buffer management; crash recovery; data processing; database cache; database systems; fast restart; media failure", subject = "Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Recovery and restart}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Transaction processing}; Software --- Operating Systems --- Storage Management (D.4.2): {\bf Main memory}", } @Article{Reuter:1984:PAR, author = "Andreas Reuter", title = "Performance Analysis of Recovery Techniques", journal = j-TODS, volume = "9", number = "4", pages = "526--559", month = dec, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-4/p526-reuter/p526-reuter.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-4/p526-reuter/", abstract = "Various logging and recovery techniques for centralized transaction-oriented database systems under performance aspects are described and discussed. The classification of functional principles that has been developed in a companion paper is used as a terminological basis. In the main sections, a set of analytic models is introduced and evaluated in order to compare the performance characteristics of nine different recovery techniques with respect to four key parameters and a set of other parameters with less influence. Finally, the results of model evaluation as well as the limitations of the models themselves are discussed.", acknowledgement = ack-nhfb, affiliation = "Univ of Kaiserslautern, Kaiserslautern, West Ger", affiliationaddress = "Univ of Kaiserslautern, Kaiserslautern, West Ger", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming --- algorithms; data processing; database systems; logging and recovery; recovery and restart; recovery techniques; Reliability; transaction processing", subject = "Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Recovery and restart}; Software --- Operating Systems --- Reliability (D.4.5): {\bf Fault-tolerance}; Software --- Operating Systems --- Performance (D.4.8): {\bf Modeling and prediction}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Transaction processing}; Information Systems --- Database Management --- Database Administration (H.2.7): {\bf Logging and recovery}", } @Article{Effelsberg:1984:PDB, author = "Wolfgang Effelsberg and Theo Haerder", title = "Principles of Database Buffer Management", journal = j-TODS, volume = "9", number = "4", pages = "560--595", month = dec, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-4/p560-effelsberg/p560-effelsberg.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-4/p560-effelsberg/", abstract = "This paper discusses the implementation of a database buffer manager as a component of a DBMS. The interface between calling components of higher system layers and the buffer manager is described; the principal differences between virtual memory paging and database buffer management are outlined; the notion of referencing versus addressing of database pages is introduced; and the concept of fixing pages in the buffer to prevent uncontrolled replacement is explained.\par Three basic tasks have to be performed by the buffer manager: buffer search, allocation of frames to concurrent transactions, and page replacement. For each of these tasks, implementation alternatives are discussed and illustrated by examples from a performance evaluation project of a CODASYL DBMS.", acknowledgement = ack-nhfb, affiliation = "IBM, Scientific Cent, Heidelberg, West Ger", affiliationaddress = "IBM, Scientific Cent, Heidelberg, West Ger", annote = "an interface between the buffer manager and the DBMS, choices of page replacement policies; does not cover sequential I/O (read-ahead and write-behind).", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "buffer management; computer programming --- algorithms; data processing; database systems; memory paging; referencing database pages; replacement algorithms", subject = "Information Systems --- Database Management --- Physical Design (H.2.2); Software --- Operating Systems --- Storage Management (D.4.2): {\bf Storage hierarchies}", } @Article{Bernstein:1984:ACC, author = "Philip A. Bernstein and Nathan Goodman", title = "An Algorithm for Concurrency Control and Recovery in Replicated Distributed Databases", journal = j-TODS, volume = "9", number = "4", pages = "596--615", month = dec, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "86k:68010", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; Distributed/fault.tolerant.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-4/p596-bernstein/p596-bernstein.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-4/p596-bernstein/", abstract = "In a one-copy distributed database, each data item is stored at exactly one site. In a replicated database, some data items may be stored at multiple sites. The main motivation is improved reliability: by storing important data at multiple sites, the DBS can operate even though some sites have failed.\par This paper describes an algorithm for handling replicated data, which allows users to operate on data so long as one copy is ``available.'' A copy is ``available'' when (i) its site is up, and (ii) the copy is not out-of-date because of an earlier crash. \par The algorithm handles clean, detectable site failures, but not Byzantine failures or network partitions.", acknowledgement = ack-nhfb, affiliation = "Sequoia Systems Inc, Marlborough, MA, USA", affiliationaddress = "Sequoia Systems Inc, Marlborough, MA, USA", annote = "3-phase commit. The first and third phases are identical to the two phases of 2-phase commit. There is a `Precommit' phase after the first phase where the knowledge of the coordinator is replicated elsewhere, thus protecting against a crash of the coordinator (which could result in locks being tied up for long periods).", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Algorithms; computer programming; concurrency control and recovery; continuous operation; database systems; replicated distributed databases; serializability; transaction processing", subject = "Information Systems --- Database Management --- Physical Design (H.2.2): {\bf Recovery and restart}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Distributed databases}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Transaction processing}", } @Article{Chen:1984:ANV, author = "Wen Chin Chen and Jeffrey Scott Vitter", title = "Analysis of New Variants of Coalesced Hashing", journal = j-TODS, volume = "9", number = "4", pages = "616--645", month = dec, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P10", MRnumber = "794 550", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-4/p616-chen/p616-chen.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-4/p616-chen/", abstract = "The coalesced hashing method has been shown to be very fast for dynamic information storage and retrieval. This paper analyzes in a uniform way the performance of coalesced hashing and its variants, thus settling some open questions in the literature.\par In all the variants, the range of the hash function is called the {\em address region}, and extra space reserved for storing colliders is called the {\em cellar}. We refer to the unmodified method, which was analyzed previously, as {\em late-insertion\/} coalesced hashing. In this paper we analyze late insertion and two new variations called {\em early insertion\/} and {\em varied insertion}. When there is no cellar, the early-insertion method is better than late insertion; however, past experience has indicated that it might be worse when there is a cellar. Our analysis confirms that it is worse. The varied-insertion method was introduced as a means of combining the advantages of late insertion and early insertion. This paper shows that varied insertion requires fewer probes per search, on the average, than do the other variants.\par Each of these three coalesced hashing methods has a parameter that relates the sizes of the address region and the cellar. Techniques in this paper are designed for tuning the parameter in order to achieve optimum search times. We conclude with a list of open problems.", acknowledgement = ack-nhfb, affiliation = "Brown Univ, Dep of Computer Science, Providence, RI, USA", affiliationaddress = "Brown Univ, Dep of Computer Science, Providence, RI, USA", annote = "Chaining and open addressing. Internal memory is assumed!", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "address region; cellar; coalesced hashing; computer programming --- algorithms; data processing; early insertion; information retrieval systems; late insertion", subject = "Data --- Data Storage Representations (E.2): {\bf Hash-table representations}; Software --- Software Engineering --- Metrics (D.2.8): {\bf Performance measures}; Theory of Computation --- Analysis of Algorithms and Problem Complexity --- Nonnumerical Algorithms and Problems (F.2.2): {\bf Sorting and searching}; Mathematics of Computing --- Discrete Mathematics --- Combinatorics (G.2.1): {\bf Generating functions}; Mathematics of Computing --- Discrete Mathematics --- Combinatorics (G.2.1): {\bf Permutations and combinations}; Mathematics of Computing --- Discrete Mathematics --- Combinatorics (G.2.1): {\bf Recurrences and difference equations}; Mathematics of Computing --- Probability and Statistics (G.3): {\bf Random number generation}; Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Search process}", } @Article{Deogun:1984:OCF, author = "J. S. Deogun and V. V. Raghavan and T. K. W. Tsou", title = "Organization of Clustered Files for Consecutive Retrieval", journal = j-TODS, volume = "9", number = "4", pages = "646--671", month = dec, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-4/p646-deogun/p646-deogun.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-4/p646-deogun/", abstract = "This paper studies the problem of storing single-level and multilevel clustered files. Necessary and sufficient conditions for a single-level clustered file to have the consecutive retrieval property (CRP) are developed. A linear time algorithm to test the CRP for a given clustered file and to identify the proper arrangement of objects, if CRP exists, is presented. For the single-level clustered files that do not have CRP, it is shown that the problem of identifying a storage organization with minimum redundancy is NP-complete.\par Consequently, an efficient heuristic algorithm to generate a good storage organization for such files is developed. Furthermore, it is shown that, for certain types of multilevel clustered files, there exists a storage organization such that the objects in each cluster, for all clusters in each level of the clustering, appear in consecutive locations.", acknowledgement = ack-nhfb, affiliation = "Univ of Nebraska, Lincoln, NE, USA", affiliationaddress = "Univ of Nebraska, Lincoln, NE, USA", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "CLUSTERED FILES; computer programming --- Algorithms; CONSECUTIVE RETRIEVAL; data processing --- File Organization; FILE ORGANIZATION; information retrieval systems; NP-COMPLETE", subject = "Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Clustering}; Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2): {\bf File organization}", } @Article{Traub:1984:SSS, author = "J. F. Traub and Y. Yemini and H. Wozniakowski", title = "The Statistical Security of a Statistical Database", journal = j-TODS, volume = "9", number = "4", pages = "672--679", month = dec, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sun Dec 8 08:54:10 MST 1996", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "This study proposes a statistical perturbation scheme to protect a statistical database against compromise. The proposed scheme can handle the security of numerical as well as nonnumerical sensitive fields or a combination of fields. Furthermore, knowledge of some records in a database does not help to compromise unknown records. The authors use Chebyshev's inequality to analyze the trade-offs among the magnitude of the perturbations, the error incurred by statistical queries, and the size of the query set to which they apply. They show that if the statistician is given absolute error guarantees, then a compromise is possible, but the cost is made exponential in the size of the database.", acknowledgement = ack-nhfb, affiliation = "Columbia Univ, Dep of Computer Science, New York, NY, USA", affiliationaddress = "Columbia Univ, Dep of Computer Science, New York, NY, USA", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Chebyshev's inequality; complexity of compromise; data processing --- Security of data; database systems; security; statistical database", } @Article{Navathe:1984:VPA, author = "Shamkant Navathe and Stefano Ceri and Gio Wiederhold and Jinglie Dou", title = "Vertical Partitioning Algorithms for Database Design", journal = j-TODS, volume = "9", number = "4", pages = "680--710", month = dec, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Parallel/Multi.bib", note = "Also published in/as: Stanford Un., TR-CS-82-957, Jan. 1983, revised Aug. 1983.", URL = "http://www.acm.org/pubs/articles/journals/tods/1984-9-4/p680-navathe/p680-navathe.pdf; http://www.acm.org/pubs/citations/journals/tods/1984-9-4/p680-navathe/", abstract = "This paper addresses the vertical partitioning of a set of logical records or a relation into fragments. The rationale behind vertical partitioning is to produce fragments, groups of attribute columns, that ``closely match'' the requirements of transactions. \par Vertical partitioning is applied in three contexts: a database stored on devices of a single type, a database stored in different memory levels, and a distributed database. In a two-level memory hierarchy, most transactions should be processed using the fragments in primary memory. In distributed databases, fragment allocation should maximize the amount of local transaction processing.\par Fragments may be nonoverlapping or overlapping. A two-phase approach for the determination of fragments is proposed; in the first phase, the design is driven by empirical objective functions which do not require specific cost information. The second phase performs cost optimization by incorporating the knowledge of a specific application environment. The algorithms presented in this paper have been implemented, and examples of their actual use are shown.", acknowledgement = ack-nhfb, affiliation = "Stanford Univ, Dep of Computer Science, Stanford, CA, USA", affiliationaddress = "Stanford Univ, Dep of Computer Science, Stanford, CA, USA", annote = "based on affinity considerations.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Algorithms; CLUSTERS; computer programming; data processing; database systems --- Design; FRAGMENT ALLOCATION; MEMORY LEVELS; VERTICAL PARTITIONING ALGORITHMS", subject = "Information Systems --- Database Management --- Logical Design (H.2.1); Information Systems --- Database Management --- Physical Design (H.2.2); Information Systems --- Database Management --- Systems (H.2.4): {\bf Transaction processing}", } @Article{Maier:1984:DFG, author = "D. Maier", title = "Databases in the Fifth Generation Project: Is {Prolog} a Database Language?", journal = j-TODS, volume = "9", number = "2", pages = "??--??", month = jun, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 10 12:48:52 1996", bibsource = "Database/Wiederhold.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: ACM SIGMOD, 1984.", annote = "very readable discussion, includes links to universal relation research.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", xxnote = "This paper does not seem to be published in TODS.", } @Article{Skeen:1984:IAP, author = "D. Skeen and D. D. Wright", title = "Increasing Availability in Partitioned Database Systems", journal = j-TODS, volume = "??", number = "??", pages = "290--299", month = Apr, year = "1984", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 10 12:48:54 1996", bibsource = "Distributed/Dist.Sys.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", xxnote = "This paper does not seem to be published in TODS.", } @Article{Franaszek:1985:LCT, author = "Peter Franaszek and John T. Robinson", title = "Limitations of Concurrency in Transaction Processing", journal = j-TODS, volume = "10", number = "1", pages = "1--28", month = mar, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/real.time.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1985-10-1/p1-franaszek/p1-franaszek.pdf; http://www.acm.org/pubs/citations/journals/tods/1985-10-1/p1-franaszek/; http://www.acm.org/pubs/toc/Abstracts/tods/3160.html", abstract = "Given the pairwise probability of conflict p among transactions in a transaction processing system, together with the total number of concurrent transactions n, the effective level of concurrency E(n,p) is defined as the expected number of the n transactions that can run concurrently and actually do useful work. Using a random graph model of concurrency, we show for three general classes of concurrency control methods, examples of which are (1) standard locking, (2) strict priority scheduling, and (3) optimistic methods, that (1) E(n, p) n(1 - p/2) n-1, (2) E(n, p) (1 - (1 - p) n)/p, and (3) 1 + ((1 - p)/p)ln(p(n - 1) + 1) E(n, p) 1 + (1/p)ln(p(n - 1) + 1). Thus, for fixed p, as n ??, (1) E 0 for standard locking methods, (2) E 1/p for strict priority scheduling methods, and (3) E for optimistic methods. Also found are bounds on E in the case where conflicts are analyzed so as to maximize E.\par The predictions of the random graph model are confirmed by simulations of an abstract transaction processing system. In practice, though, there is a price to pay for the increased effective level of concurrency of methods (2) and (3): using these methods there is more wasted work (i.e., more steps executed by transactions that are later aborted). In response to this problem, three new concurrency control methods suggested by the random graph model analysis are developed. Two of these, called (a) running priority and (b) older or running priority, are shown by the simulation results to perform better than the previously known methods (l)-(3) for relatively large n or large p, in terms of achieving a high effective level of concurrency at a comparatively small cost in wasted work.", acknowledgement = ack-nhfb, affiliation = "IBM, Thomas J. Watson Research Cent, Yorktown Heights, NY, USA", affiliationaddress = "IBM, Thomas J. Watson Research Cent, Yorktown Heights, NY, USA", annote = "6 methods, incl. optimistic (best) but not version-ing.", classification = "722; 723; 921", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer systems, digital --- multiprocessing; concurrency control; database systems; mathematical techniques --- graph theory; performance; theory; transaction processing, algorithms; verification", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems. {\bf F.2.2}: Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Nonnumerical Algorithms and Problems. {\bf D.1.3}: Software, PROGRAMMING TECHNIQUES, Concurrent Programming. {\bf D.4.1}: Software, OPERATING SYSTEMS, Process Management, Concurrency.", } @Article{Sacca:1985:DPC, author = "Domenico Sacca and Gio Wiederhold", title = "Database Partitioning in a Cluster of Processors", journal = j-TODS, volume = "10", number = "1", pages = "29--56", month = mar, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Parallel/Multi.bib", note = "Also published in \cite[242--247]{Schkolnick:1983:ICV}, and IBM Research Report No. RJ-4076, 1983.", URL = "http://www.acm.org/pubs/articles/journals/tods/1985-10-1/p29-sacca/p29-sacca.pdf; http://www.acm.org/pubs/citations/journals/tods/1985-10-1/p29-sacca/; http://www.acm.org/pubs/toc/Abstracts/tods/3161.html", abstract = "In a distributed database system the partitioning and allocation of the database over the processor nodes of the network can be a critical aspect of the database design effort. In this paper we develop and evaluate algorithms that perform this task in a computationally feasible manner. The network we consider is characterized by a relatively high communication bandwidth, considering the processing and input output capacities in its processors. Such a balance is typical if the processors are connected via busses or local networks. The common constraint that transactions have a specific root node no longer exists, so that there are more distribution choices. However, a poor distribution leads to less efficient computation, higher costs, and higher loads in the nodes or in the communication network so that the system may not be able to handle the required set of transactions. \par Our approach is to first split the database into fragments which constitute appropriate units for allocation. The fragments to be allocated are selected based on maximal benefit criteria using a greedy heuristic. The assignment to processor nodes uses a first-fit algorithm. The complete algorithm, called GFF, is stated in a procedural form.\par The complexity of the problem and of its candidate solutions are analyzed and several interesting relationships are proven. Alternate benefit metrics are considered, since the execution cost of the allocation procedure varies by orders of magnitude with the alternatives of benefit evaluation. A mixed benefit evaluation strategy is eventually proposed.\par A model for evaluation is presented. Two of the strategies are experimentally evaluated, and the reported results support the discussion. The approach should be suitable for other cases where resources have to be allocated subject to resource constraints.", acknowledgement = ack-nhfb, affiliation = "IBM, Research Lab, San Jose, CA, USA", affiliationaddress = "IBM, Research Lab, San Jose, CA, USA", classification = "722; 723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer systems, digital --- multiprocessing; database partitioning, parallelism declustering partitioning disk striping TODS, algorithms; database systems; design; theory", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems.", } @Article{Pramanik:1985:UGT, author = "Sakti Pramanik and David Ittner", title = "Use of Graph-Theoretic Models for Optimal Relational Database Accesses to Perform Join", journal = j-TODS, volume = "10", number = "1", pages = "57--74", month = mar, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "794 551", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1985-10-1/p57-pramanik/p57-pramanik.pdf; http://www.acm.org/pubs/citations/journals/tods/1985-10-1/p57-pramanik/; http://www.acm.org/pubs/toc/Abstracts/tods/3325.html", abstract = "A graph model is presented to analyze the performance of a relational join. The amount of page reaccesses, the page access sequence, and the amount of buffer needed are represented in terms of graph parameters. By using the graph model formed from the index on the join attributes, we determine the relationships between these parameters. Two types of buffer allocation strategies are studied, and the upper bound on the buffer size with no page reaccess is given. This bound is shown to be the maximum cut value of a graph. Hence, the problem of computing this upper bound is NP-hard. We also give algorithms to determine a page access sequence requiring a near optimal buffer size with no page reaccess. The optimal page access sequence for a fixed buffer size has also been considered.", acknowledgement = ack-nhfb, affiliation = "Michigan State Univ, Computer Science Dep, East Lansing, MI, USA", affiliationaddress = "Michigan State Univ, Computer Science Dep, East Lansing, MI, USA", annote = "buffer management for indexes.", classification = "723; 921", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Experimentation; Measurement; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming --- algorithms; database systems; experimentation; graph-theoretic models, query optimization processing TODS, algorithms; mathematical techniques --- graph theory; measurement; performance; relational database accesses; relational join; theory", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf G.2.2}: Mathematics of Computing, DISCRETE MATHEMATICS, Graph Theory, Graph algorithms.", } @Article{Larson:1985:LHO, author = "Per-{\AA}ke Larson", title = "Linear Hashing with Overflow-Handling by Linear Probing", journal = j-TODS, volume = "10", number = "1", pages = "75--89", month = mar, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1985-10-1/p75-larson/p75-larson.pdf; http://www.acm.org/pubs/citations/journals/tods/1985-10-1/p75-larson/; http://www.acm.org/pubs/toc/Abstracts/tods/3324.html", abstract = "Linear hashing is a file structure for dynamic files. In this paper, a new, simple method for handling overflow records in connection with linear hashing is proposed. The method is based on linear probing and does not rely on chaining. No dedicated overflow area is required. The expansion sequence of linear hashing is modified to improve the performance, which requires changes in the address computation. A new address computation algorithm and an expansion algorithm are given. The performance of the method is studied by simulation. The algorithms for the basic file operations are very simple, and the overall performance is competitive with that of other variants of linear hashing.", acknowledgement = ack-nhfb, affiliation = "Univ of Waterloo, Dep of Computer Science, Waterloo, Ont, Can", affiliationaddress = "Univ of Waterloo, Dep of Computer Science, Waterloo, Ont, Can", annote = "New algorithm for files that grow and shrink dynamically; the overflow records of a full page are directed to the next page of a group; the introduction of five groups and the backwards split order makes this algorithm better than previous ones.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming --- algorithms; data processing; database systems; design; dynamic hashing; file organization; linear hashing; measurement; open addressing, algorithms; performance", review = "ACM CR 8512-1134", subject = "{\bf E.2}: Data, DATA STORAGE REPRESENTATIONS, Hash-table representations. {\bf D.2.2}: Software, SOFTWARE ENGINEERING, Tools and Techniques, Decision tables. {\bf E.5}: Data, FILES, Organization/structure.", } @Article{Veklerov:1985:ADH, author = "Eugene Veklerov", title = "Analysis of Dynamic Hashing with Deferred Splitting", journal = j-TODS, volume = "10", number = "1", pages = "90--96", month = mar, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1985-10-1/p90-veklerov/p90-veklerov.pdf; http://www.acm.org/pubs/citations/journals/tods/1985-10-1/p90-veklerov/", abstract = "Dynamic hashing with deferred splitting is a file organization scheme which increases storage utilization, as compared to `standard' dynamic hashing. In this scheme, splitting of a bucket is deferred if the bucket is full but its brother can accommodate new records. The performance of the scheme is analyzed. In a typical case the expected storage utilization increases from 69 to 76 percent.", acknowledgement = ack-nhfb, affiliation = "Lawrence Berkeley Lab, Real Time Systems Group, Berkeley, CA, USA", affiliationaddress = "Lawrence Berkeley Lab, Real Time Systems Group, Berkeley, CA, USA", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data processing; database systems; DEFERRED SPLITTING; DYNAMIC HASHING; File Organization; STORAGE UTILIZATION", } @Article{Palvia:1985:EBS, author = "Prashant Palvia", title = "Expressions for Batched Searching of Sequential and Hierarchical Files", journal = j-TODS, volume = "10", number = "1", pages = "97--106", month = mar, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1985-10-1/p97-palvia/p97-palvia.pdf; http://www.acm.org/pubs/citations/journals/tods/1985-10-1/p97-palvia/; http://www.acm.org/pubs/toc/Abstracts/tods/3326.html", abstract = "Batching yields significant savings in access costs in sequential, tree-structured, and random files. A direct and simple expression is developed for computing the average number of records\slash pages accessed to satisfy a batched query of a sequential file. The advantages of batching for sequential and random files are discussed. A direct equation is provided for the number of nodes accessed in unbatched queries of hierarchical files. An exact recursive expression is developed for node accesses in batched queries of hierarchical files. In addition to the recursive relationship, good, closed-form upper- and lower-bound approximations are provided for the case of batched queries of hierarchical files.", acknowledgement = ack-nhfb, affiliation = "Temple Univ, Dep of Computer \& Information Sciences, Philadelphia, PA, USA", affiliationaddress = "Temple Univ, Dep of Computer \& Information Sciences, Philadelphia, PA, USA", classification = "723; 901", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "batched searching; database systems; design; hierarchical files; information science --- information retrieval; sequential files, performance; theory", subject = "{\bf E.5}: Data, FILES, Organization/structure. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Search process.", } @Article{Bever:1985:DHS, author = "Martin Bever and Peter C. Lockemann", title = "Database Hosting in Strongly-Typed Programming Languages", journal = j-TODS, volume = "10", number = "1", pages = "107--126", month = mar, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1985-10-1/p107-bever/p107-bever.pdf; http://www.acm.org/pubs/citations/journals/tods/1985-10-1/p107-bever/; http://www.acm.org/pubs/toc/Abstracts/tods/3327.html", abstract = "Database system support has become an essential part of many computer applications, which have extended beyond the more traditional commercial applications to, among others, engineering applications. Correspondingly, application programming with the need to access databases has progressively shifted to scientifically oriented languages.\par Modern developments in these languages are characterized by advanced mechanisms for the liberal declaration of data types, for type checking, and facilities for modularization of large programs. The present paper examines how a DBMS can be accessed from such a language in a way that conforms to its syntax and utilizes its type-checking facilities, without modifying the language specification itself, and hence its compilers. The basic idea is to rely on facilities for defining modules as separately compilable units, and to use these to declare user-defined abstract data types.\par The idea is demonstrated by an experiment in which a specific DBMS (ADABAS) is hosted in the programming language (LIS). The paper outlines a number of approaches and their problems, shows how to embed the DML into LIS, and how a more user-oriented DML can be provided in LIS.", acknowledgement = ack-nhfb, acmcrnumber = "8707-597", affiliation = "Univ Karlsruhe, Inst fuer Informatik, Karlsruhe, West Ger", affiliationaddress = "Univ Karlsruhe, Inst fuer Informatik, Karlsruhe, West Ger", annote = "ADABAS is the experimental target system and the language is LIS.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming languages; database hosting, design; database systems; languages; parameterized data types; schema mapping; strongly-typed programming languages", subject = "{\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data manipulation languages (DML). {\bf D.3.3}: Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Abstract data types. {\bf D.3.3}: Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Data types and structures. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data description languages (DDL).", } @Article{Chen:1985:AAS, author = "Wen Chin Chen and Jeffrey Scott Vitter", title = "Addendum to: {``Analysis of Some New Variants of Coalesced Hashing''} [{ACM} Trans. Database Systems {\bf 9} (1984), no. 4, 616--645]", journal = j-TODS, volume = "10", number = "1", pages = "127--127", month = mar, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P10", MRnumber = "794 552", bibsource = "Database/Graefe.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Vitter:1985:EIO, author = "Jeffrey Scott Vitter", title = "An Efficient {I/O} Interface for Optical Disks", journal = j-TODS, volume = "10", number = "2", pages = "129--162", month = jun, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1985-10-2/p129-vitter/p129-vitter.pdf; http://www.acm.org/pubs/citations/journals/tods/1985-10-2/p129-vitter/; http://www.acm.org/pubs/toc/Abstracts/tods/3862.html", abstract = "We introduce the notion of an I/O interface for optical digital (write-once) disks, which is quite different from earlier research. The purpose of an I/O interface is to allow existing operating systems and application programs that use magnetic disks to use optical disks instead, with minimal change. We define what it means for an I/O interface to be disk-efficient. We demonstrate a practical disk- efficient I/O interface and show that its I/O performance in many cases is optimum, up to a constant factor, among all disk-efficient interfaces. The interface is most effective for applications that are not update-intensive. An additional capability is a built-in history mechanism that provides software support for accessing previous versions of records. Even if not implemented, the I/O interface can be used as a programming tool to develop efficient special purpose applications for use with optical disks.", acknowledgement = ack-nhfb, affiliation = "Brown Univ, Dep of Computer Science, Providence, RI, USA", affiliationaddress = "Brown Univ, Dep of Computer Science, Providence, RI, USA", annote = "An I/O interface supports basic update operations such as insert write and delete on the block is proposed. Index techniques for erasable media (Btree is assumed in this paper) can be implemented on this interface. Versions of a block is stored as an allocation tree on an optical disk, which is an efficient implementation of the pointer fill-in method. Contents of a version of a block is represented by an offset tree. Theoretical lower bound of these operations is evaluated. This paper assumes that appending into existing block is possible on optical disk.", classification = "722; 741", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer interfaces; data storage, optical; design; I/O interface; optical disks, algorithms; performance; theory", subject = "{\bf D.4.2}: Software, OPERATING SYSTEMS, Storage Management, Secondary storage. {\bf D.4.2}: Software, OPERATING SYSTEMS, Storage Management, Allocation/deallocation strategies. {\bf D.4.3}: Software, OPERATING SYSTEMS, File Systems Management, Access methods. {\bf D.4.3}: Software, OPERATING SYSTEMS, File Systems Management, File organization. {\bf E.1}: Data, DATA STRUCTURES, Trees. {\bf E.2}: Data, DATA STORAGE REPRESENTATIONS, Hash-table representations. {\bf E.2}: Data, DATA STORAGE REPRESENTATIONS, Linked representations. {\bf F.2.2}: Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Nonnumerical Algorithms and Problems, Sorting and searching. {\bf G.2.1}: Mathematics of Computing, DISCRETE MATHEMATICS, Combinatorics, Combinatorial algorithms. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods.", } @Article{Schkolnick:1985:ECU, author = "M. Schkolnick and P. Tiberio", title = "Estimating the Cost of Updates in a Relational Database", journal = j-TODS, volume = "10", number = "2", pages = "163--179", month = jun, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1985-10-2/p163-schkolnick/p163-schkolnick.pdf; http://www.acm.org/pubs/citations/journals/tods/1985-10-2/p163-schkolnick/; http://www.acm.org/pubs/toc/Abstracts/tods/3863.html", abstract = "In this paper, cost formulas are derived for the updates of data and indexes in a relational database. The costs depend on the data scan type and the predicates involved in the update statements. We show that update costs have a considerable influence, both in the context of the physical database design problem and in access path selection in query optimization for relational DBMSs.", acknowledgement = ack-nhfb, affiliation = "IBM Research Lab, San Jose, CA, USA", affiliationaddress = "IBM Research Lab, San Jose, CA, USA", annote = "tradeoff by a given index query cost against update cost.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "costs; database systems; measurement; performance; query optimization; relational databases; update costs, design", subject = "{\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing.", } @Article{Yu:1985:ARC, author = "C. T. Yu and Cheing-Mei Suen and K. Lam and M. K. Siu", title = "Adaptive Record Clustering", journal = j-TODS, volume = "10", number = "2", pages = "180--204", month = jun, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1985-10-2/p180-yu/p180-yu.pdf; http://www.acm.org/pubs/citations/journals/tods/1985-10-2/p180-yu/; http://www.acm.org/pubs/toc/Abstracts/tods/3861.html", abstract = "An algorithm for record clustering is presented. It is capable of detecting sudden changes in users' access patterns and then suggesting an appropriate assignment of records to blocks. It is conceptually simple, highly intuitive, does not need to classify queries into types, and avoids collecting individual query statistics. Experimental results indicate that it converges rapidly; its performance is about 50 percent better than that of the total sort method, and about 100 percent better than that of randomly assigning records to blocks.", acknowledgement = ack-nhfb, affiliation = "Univ of Illinois at Chicago Circle, Dep of Electrical Engineering \& Computer Science, Chicago, IL, USA", affiliationaddress = "Univ of Illinois at Chicago Circle, Dep of Electrical Engineering \& Computer Science, Chicago, IL, USA", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Experimentation; Measurement; Performance; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "adaptive record clustering; algorithms; computer programming --- algorithms; database systems; experimentation; file organization; measurement; performance; physical database design; probabilistic retrieval, CTYU TODS; theory; verification, data processing", subject = "{\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design. {\bf E.5}: Data, FILES, Organization/structure. {\bf H.2.m}: Information Systems, DATABASE MANAGEMENT, Miscellaneous. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods.", } @Article{Katoh:1985:CTS, author = "Naoki Katoh and Toshihide Ibaraki and Tiko Kameda", title = "Cautious Transaction Schedulers with Admission Control", journal = j-TODS, volume = "10", number = "2", pages = "205--229", month = jun, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1985-10-2/p205-katoh/p205-katoh.pdf; http://www.acm.org/pubs/citations/journals/tods/1985-10-2/p205-katoh/; http://www.acm.org/pubs/toc/Abstracts/tods/3860.html", abstract = "We propose a new class of schedulers, called {\em cautious schedulers}, that grant an input request if it will not necessitate any rollback in the future. In particular, we investigate cautious WRW-schedulers that output schedules in class WRW only. Class WRW consists of all schedules that are serializable, while preserving the write-read and read-write conflict, and is the largest polynomially {\em recognizable\/} subclass of serializable schedules currently known. It is shown, in this paper however, that cautious WRW- {\em scheduling\/} is, in general, NP-complete. Therefore, we introduce a special type ({\em type 1R\/}) of transaction, which consists of no more than one read step (an indivisible set of read operations) followed by multiple write steps. It is shown that cautious WRW-scheduling can be performed efficiently if all transactions are of type 1R and if {\em admission control\/} can be exercised. Admission control rejects a transaction unless its first request is immediately grantable.", acknowledgement = ack-nhfb, affiliation = "Kobe Univ of Commerce, Dep of Management Science, Kobe, Japan", affiliationaddress = "Kobe Univ of Commerce, Dep of Management Science, Kobe, Japan", annote = "serializability control for predefined transaction sequences.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency control; database systems; scheduling; serializability; transaction scheduler, algorithms", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing.", } @Article{Albano:1985:GST, author = "Antonio Albano and Luca Cardelli and Renzo Orsini", title = "{Galileo}: a Strongly-Typed, Interactive Conceptual Language", journal = j-TODS, volume = "10", number = "2", pages = "230--260", month = jun, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; Distributed/gesturing.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/Functional.bib; Object/Nierstrasz.bib", note = "Also published in \cite{Zdonik:1990:ROO}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1985-10-2/p230-albano/p230-albano.pdf; http://www.acm.org/pubs/citations/journals/tods/1985-10-2/p230-albano/; http://www.acm.org/pubs/toc/Abstracts/tods/3859.html", abstract = "Galileo, a programming language for database applications, is presented. Galileo is a strongly-typed, interactive programming language designed specifically to support semantic data model features (classification, aggregation, and specialization), as well as the abstraction mechanisms of modern programming languages (types, abstract types, and modularization). The main contributions of Galileo are (a) a flexible type system to model database structure and semantic integrity constraints; (b) the inclusion of type hierarchies to support the specialization abstraction mechanisms of semantic data models; (c) a modularization mechanism to structure data and operations into interrelated units (d) the integration of abstraction mechanisms into an expression-based language that allows interactive use of the database without resorting to a new stand-alone query language.\par Galileo will be used in the immediate future as a tool for database design and, in the long term, as a high-level interface for DBMSs.", acknowledgement = ack-nhfb, affiliation = "Univ di Pisa, Dipartmento di Informatica, Pisa, Italy", affiliationaddress = "Univ di Pisa, Dipartmento di Informatica, Pisa, Italy", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming languages; data description languages; data manipulation; database systems; functional abstract data types; Galileo; languages; olit-oopl Galileo; query languages, design", subject = "{\bf D.3.2}: Software, PROGRAMMING LANGUAGES, Language Classifications, GALILEO. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data description languages (DDL). {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data manipulation languages (DML). {\bf D.3.3}: Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Abstract data types. {\bf D.3.3}: Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Data types and structures. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema.", } @Article{Huang:1985:HBT, author = "Shou-Hsuan Stephen Huang", title = "Height-balanced Trees of Order $ (\beta, \gamma, \delta) $", journal = j-TODS, volume = "10", number = "2", pages = "261--284", month = jun, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P05", MRnumber = "801 578", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1985-10-2/p261-huang/p261-huang.pdf; http://www.acm.org/pubs/citations/journals/tods/1985-10-2/p261-huang/", abstract = "We study restricted classes of B-trees, called $ H(\beta, \gamma, \delta) $ trees. A class is defined by three parameters: $ \beta $, the size of a node; $ \gamma $, the minimal number of grandsons a node must have; and $ \delta $, the minimal number of leaves bottom nodes must have. This generalizes the brother condition of 2-3 brother trees in a uniform way to B-trees of higher order. The class of B-trees of order m is obtained by choosing $ \beta = m $, $ \gamma = (m / 2)^2 $, and $ \delta = m / 2 $. An algorithm to construct H-trees for any given number of keys is given in Section 1. Insertion and deletion algorithms are given in Section 2. The costs of these algorithms increase smoothly as the parameters are increased. Furthermore, it is proved that the insertion can be done in time $ O(?? + \log N) $, where $N$ is the number of nodes in the tree. Deletion can also be accomplished without reconstructing the entire tree. Properties of H-trees are given in Section 3. It is shown that the height of H-trees decreases as increases, and the storage utilization increases significantly as increases. Finally, comparisons with other restricted classes of B-trees are given in Section 4 to show the attractiveness of H-trees.", acknowledgement = ack-nhfb, affiliation = "Univ of Houston, Dep of Computer Science, Houston, TX, USA", affiliationaddress = "Univ of Houston, Dep of Computer Science, Houston, TX, USA", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; b-trees; compact b-trees; computer programming --- algorithms; data processing; data structures; dense multiway trees; height-balanced trees; performance", subject = "{\bf E.1}: Data, DATA STRUCTURES, Trees. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods.", } @Article{Piwowarski:1985:CBS, author = "Marek Piwowarski", title = "Comments on Batched Searching of Sequential and Tree-Structured Files", journal = j-TODS, volume = "10", number = "2", pages = "285--287", month = jun, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "See \cite{Shneiderman:1976:BSS,Batory:1982:UMP}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1985-10-2/p285-piwowarski/p285-piwowarski.pdf; http://www.acm.org/pubs/citations/journals/tods/1985-10-2/p285-piwowarski/; http://www.acm.org/pubs/toc/Abstracts/tods/214294.html", abstract = "Exact formulas for the expected cost savings from batching requests against two types of j-ary trees are given. Approximate expressions are also presented.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "performance", subject = "{\bf H.3.2}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Storage, File organization. {\bf E.1}: Data, DATA STRUCTURES, Trees. {\bf F.2.2}: Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Nonnumerical Algorithms and Problems, Sorting and searching.", } @Article{Ullman:1985:ILQ, author = "Jeffrey D. Ullman", title = "Implementation of Logical Query Languages for Databases", journal = j-TODS, volume = "10", number = "3", pages = "289--321", month = sep, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Ai/nonmono.bib; Ai/prolog.1.bib; Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Sep., YEAR $=$ 1985", URL = "http://www.acm.org/pubs/articles/journals/tods/1985-10-3/p289-ullman/p289-ullman.pdf; http://www.acm.org/pubs/citations/journals/tods/1985-10-3/p289-ullman/; http://www.acm.org/pubs/toc/Abstracts/tods/3980.html", abstract = "We examine methods of implementing queries about relational databases in the case where these queries are expressed in first-order logic as a collection of Horn clauses. Because queries may be defined recursively, straightforward methods of query evaluation do not always work, and a variety of strategies have been proposed to handle subsets of recursive queries. We express such query evaluation techniques as ``capture rules'' on a graph representing clauses and predicates. One essential property of capture rules is that they can be applied independently, thus providing a clean interface for query-evaluation systems that use several different strategies in different situations. Another is that there be an efficient test for the applicability of a given rule. We define basic capture rules corresponding to application of operators from relational algebra, a top-down capture rule corresponding to ``backward chaining,'' that is, repeated resolution of goals, a bottom-up rule, corresponding to ``forward chaining,'' where we attempt to deduce all true facts in a given class, and a ``sideways'' rule that allows us to pass results from one goal to another.", acknowledgement = ack-nhfb, affiliation = "Stanford Univ, Dep of Computer Science, Stanford, CA, USA", affiliationaddress = "Stanford Univ, Dep of Computer Science, Stanford, CA, USA", classification = "723", conference = "Sel Pap from the 1985 ACM SIGMOD Conf", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Languages; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming languages; database systems; Horn clauses; languages; logical query languages; relational databases, Prolog, algorithms; theory; verification", meetingaddress = "Austin, TX, USA", meetingdate = "May 28--31 1985", meetingdate2 = "05/28--31/85", subject = "{\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf I.2.3}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Deduction and Theorem Proving, Logic programming. {\bf I.2.4}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Knowledge Representation Formalisms and Methods, Predicate logic.", } @Article{Anonymous:1985:SPA, author = "Anonymous", title = "Selected Papers from the 1985 {ACM SIGMOD Conference}", journal = j-TODS, volume = "10", number = "3", pages = "289--346", month = sep, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Nov 10 07:59:49 1998", bibsource = "Compendex database; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "This issue contains 2 conference papers. The topics covered are: logical query languages for databases; and modeling concepts for VLSI CAD objects.", acknowledgement = ack-nhfb, classification = "714; 723", conference = "Selected Papers from the 1985 ACM SIGMOD Conference.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", journalabr = "ACM Transactions on Database Systems", keywords = "CAD; database systems; design automation; integrated circuits, VLSI --- computer aided design; logical query languages; relational databases", meetingaddress = "Austin, TX, USA", sponsor = "ACM, Special Interest Group on Management of Data, New York, NY, USA", } @Article{Batory:1985:MCV, author = "D. S. Batory and Won Kim", title = "Modeling Concepts for {VLSI CAD} Objects", journal = j-TODS, volume = "10", number = "3", pages = "322--346", month = sep, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: ACM-SIGMOD 1985.", URL = "http://www.acm.org/pubs/articles/journals/tods/1985-10-3/p322-batory/p322-batory.pdf; http://www.acm.org/pubs/citations/journals/tods/1985-10-3/p322-batory/; http://www.acm.org/pubs/toc/Abstracts/tods/4018.html", abstract = "VLSI CAD applications deal with design objects that have an interface description and an implementation description. Versions of design objects have a common interface but differ in their implementations. A molecular object is a modeling construct which enables a database entity to be represented by two sets of heterogeneous records, one set describes the object's interface and the other describes its implementation. Thus a reasonable starting point for modeling design objects is to begin with the concept of molecular objects.\par In this paper, we identify modeling concepts that are fundamental to capturing the semantics of VLSI CAD design objects and versions in terms of molecular objects. A provisional set of user operations on design objects, consistent with these modeling concepts, is also defined. The modeling framework that we present has been found useful for investigating physical storage techniques and change notification problems in version control.", acknowledgement = ack-nhfb, affiliation = "Univ of Texas at Austin, Dep of Computer Sciences, Austin, TX, USA", affiliationaddress = "Univ of Texas at Austin, Dep of Computer Sciences, Austin, TX, USA", classification = "714; 723", conference = "Sel Pap from the 1985 ACM SIGMOD Conf", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "CAD; Computer Aided Design; database systems; design automation; integrated circuits, VLSI; languages; relational databases; storage techniques, design; version control", meetingaddress = "Austin, TX, USA", meetingdate = "May 28--31 1985", meetingdate2 = "05/28--31/85", subject = "{\bf B.7.1}: Hardware, INTEGRATED CIRCUITS, Types and Design Styles, VLSI (very large scale integration).", } @Article{Subieta:1985:SQL, author = "Kazimierz Subieta", title = "Semantics of Query Languages for Network Databases", journal = j-TODS, volume = "10", number = "3", pages = "347--394", month = sep, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1985-10-3/p347-subieta/p347-subieta.pdf; http://www.acm.org/pubs/citations/journals/tods/1985-10-3/p347-subieta/; http://www.acm.org/pubs/toc/Abstracts/tods/214293.html", abstract = "Semantics determines the meaning of language constructs; hence it says much more than syntax does about implementing the language. The main purpose of this paper is a formal presentation of the meaning of basic language constructs employed in many database languages (sublanguages). Therefore, stylized query languages SSL (Sample Selection Language) and J (Joins) are introduced, wherein most of the typical entries present in other query languages are collected. The semantics of SSL and J are defined by means of the denotational method and explained informally. In SSL and J, four types of expressions are introduced: a selector (denotes a set of addresses), a term (denotes a set of values), a formula (denotes a truth value), and a join (denotes a set of n-tuples of addresses or values). In many cases alternative semantics are given and discussed. In order to obtain more general properties of the proposed languages, a new database access model is introduced, intended to be a tool for the description of the logical access paths to data. In particular, the access paths of the network and relational models can be described. SSL and J expressions may be addressed to both data structures. In the case of the relational model, expressions of J are similar to SQL or QUEL statements. Thus J may be considered a generalization of relational query languages for the network model. Finally, a programming language, based on SSL and J, is outlined, and the issues of SSL and J implementation are considered.", acknowledgement = ack-nhfb, affiliation = "Polish Acad of Sciences, Inst of Computer Science, Warsaw, Pol", affiliationaddress = "Polish Acad of Sciences, Inst of Computer Science, Warsaw, Pol", classification = "723", conference = "Sel Pap from the 1985 ACM SIGMOD Conf", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming languages; data manipulation languages; database systems; denotational semantics; query languages; query optimization, languages; theory", meetingaddress = "Austin, TX, USA", meetingdate = "May 28--31 1985", meetingdate2 = "05/28--31/85", subject = "{\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data manipulation languages (DML).", } @Article{Liew:1985:DDP, author = "Chong K. Liew and Uinam J. Choi and Chung J. Liew", title = "A Data Distortion by Probability Distribution", journal = j-TODS, volume = "10", number = "3", pages = "395--411", month = sep, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1985-10-3/p395-liew/p395-liew.pdf; http://www.acm.org/pubs/citations/journals/tods/1985-10-3/p395-liew/; http://www.acm.org/pubs/toc/Abstracts/tods/4017.html", abstract = "This paper introduces data distortion by probability distribution, a probability distortion that involves three steps. The first step is to identify the underlying density function of the original series and to estimate the parameters of this density function. The second step is to generate a series of data from the estimated density function. And the final step is to map and replace the generated series for the original one. Because it is replaced by the distorted data set, probability distortion guards the privacy of an individual belonging to the original data set. At the same time, the probability distorted series provides asymptotically the same statistical properties as those of the original series, since both are under the same distribution. Unlike conventional point distortion, probability distortion is difficult to compromise by repeated queries, and provides a maximum exposure for statistical analysis.", acknowledgement = ack-nhfb, affiliation = "Univ of Oklahoma, Norman, OK, USA", affiliationaddress = "Univ of Oklahoma, Norman, OK, USA", annote = "analysis of pollution technique.", classification = "723", conference = "Sel Pap from the 1985 ACM SIGMOD Conf", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Security", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data distortion; database systems; probability; probability distortion; security; statistical databases, statistical security; TODS, algorithms", meetingaddress = "Austin, TX, USA", meetingdate = "May 28--31 1985", meetingdate2 = "05/28--31/85", subject = "{\bf H.2.0}: Information Systems, DATABASE MANAGEMENT, General, Security, integrity, and protection. {\bf H.2.7}: Information Systems, DATABASE MANAGEMENT, Database Administration. {\bf G.3}: Mathematics of Computing, PROBABILITY AND STATISTICS, Statistical computing. {\bf G.3}: Mathematics of Computing, PROBABILITY AND STATISTICS.", } @Article{Tay:1985:LPC, author = "Y. C. Tay and Nathan Goodman and Rajan Suri", title = "Locking Performance in Centralized Databases", journal = j-TODS, volume = "10", number = "4", pages = "415--462", month = dec, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1985-10-4/p415-tay/p415-tay.pdf; http://www.acm.org/pubs/citations/journals/tods/1985-10-4/p415-tay/; http://www.acm.org/pubs/toc/Abstracts/tods/4880.html", abstract = "An analytic model is used to study the performance of dynamic locking. The analysis uses only the steady-state average values of the variables. The solution to the model is given by a cubic, which has exactly one valid root for the range of parametric values that is of interest. The model's predictions agree well with simulation results for transactions that require up to twenty locks. The model separates data contention from resource contention, thus facilitating an analysis of their separate effects and their interaction. It shows that systems with a particular form of nonuniform access, or with shared locks, are equivalent to systems with uniform access and only exclusive locks.\par Blocking due to conflicts is found to impose an upper bound on transaction throughput; this fact leads to a rule of thumb on how much data contention should be permitted in a system. Throughput can exceed this bound if a transaction is restarted whenever it encounters a conflict, provided restart costs and resource contention are low. It can also be exceeded by making transactions predeclare their locks. Raising the multiprogramming level to increase throughput also raises the number of restarts per completion. Transactions should minimize their lock requests, because data contention is proportional to the square of the number of requests. The choice of how much data to lock at a time depends on which part of a general granularity curve the system sees.", acknowledgement = ack-nhfb, affiliation = "Natl Univ of Singapore, Singapore, Singapore", affiliationaddress = "Natl Univ of Singapore, Singapore, Singapore", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Measurement; Performance; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency control; data contention; database locking; database systems; measurement; performance; resource contention, algorithms; theory; verification", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Deadlock avoidance. {\bf C.4}: Computer Systems Organization, PERFORMANCE OF SYSTEMS, Modeling techniques. {\bf C.2.2}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols. {\bf C.2.1}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design, Centralized networks.", } @Article{Batory:1985:MSA, author = "D. S. Batory", title = "Modeling the Storage Architectures of Commercial Database Systems", journal = j-TODS, volume = "10", number = "4", pages = "463--528", month = dec, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1985-10-4/p463-batory/p463-batory.pdf; http://www.acm.org/pubs/citations/journals/tods/1985-10-4/p463-batory/; http://www.acm.org/pubs/toc/Abstracts/tods/5392.html", abstract = "Modeling the storage structures of a DBMS is a prerequisite to understanding and optimizing database performance. Previously, such modeling was very difficult because the fundamental role of conceptual-to-internal mappings in DBMS implementations went unrecognized.\par In this paper we present a model of physical databases, called the transformation model, that makes conceptual-to-internal mappings explicit. By exposing such mappings, we show that it is possible to model the storage architectures (i.e., the storage structures and mappings) of many commercial DBMSs in a precise, systematic, and comprehensible way. Models of the INQUIRE, ADABAS, and SYSTEM 2000 storage architectures are presented as examples of the model's utility. \par We believe the transformation model helps bridge the gap between physical database theory and practice. It also reveals the possibility of a technology to automate the development of physical database software.", acknowledgement = ack-nhfb, affiliation = "Univ of Texas at Austin, Austin, TX, USA", affiliationaddress = "Univ of Texas at Austin, Austin, TX, USA", annote = "considers ADABAS, INQUIRE, SYSTEM2000 in depth. Classification of linksets. modeling storage methods of Inquire, ADABAS, and System 2000.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Documentation", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data processing --- data structures; database systems; documentation; storage architectures, design", subject = "{\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf E.5}: Data, FILES, Organization/structure. {\bf H.2.m}: Information Systems, DATABASE MANAGEMENT, Miscellaneous.", } @Article{Agrawal:1985:ICC, author = "Rakesh Agrawal and David J. Dewitt", title = "Integrated Concurrency Control and Recovery Mechanisms: Design and Performance Evaluation", journal = j-TODS, volume = "10", number = "4", pages = "529--564", month = dec, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1985-10-4/p529-agrawal/p529-agrawal.pdf; http://www.acm.org/pubs/citations/journals/tods/1985-10-4/p529-agrawal/; http://www.acm.org/pubs/toc/Abstracts/tods/4958.html", abstract = "In spite of the wide variety of concurrency control and recovery mechanisms proposed during the past decade, the behavior and the performance of various concurrency control and recovery mechanisms remain largely not well understood. In addition, although concurrency control and recovery mechanisms are intimately related, the interaction between them has not been adequately explored. In this paper, we take a unified view of the problems associated with concurrency control and recovery for transaction-oriented multiuser centralized database management systems, and we present several integrated mechanisms. We then develop analytical models to study the behavior and compare the performance of these integrated mechanisms, and we present the results of our performance evaluation.", acknowledgement = ack-nhfb, affiliation = "AT\&T Bell Lab, Murray Hill, NJ, USA", affiliationaddress = "AT\&T Bell Lab, Murray Hill, NJ, USA", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency control; database systems; design; measurement; performance; recovery mechanisms; transaction processing, algorithms", subject = "{\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Recovery and restart. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Deadlock avoidance. {\bf D.4.1}: Software, OPERATING SYSTEMS, Process Management, Concurrency. {\bf D.4.3}: Software, OPERATING SYSTEMS, File Systems Management.", } @Article{Borgida:1985:LFF, author = "Alexander Borgida", title = "Language Features for Flexible Handling of Exceptions in Information Systems", journal = j-TODS, volume = "10", number = "4", pages = "565--603", month = dec, year = "1985", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/database.bib; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: Rutgers Un., TR-LCSR-70, rev. Mar. 1985.", URL = "http://www.acm.org/pubs/articles/journals/tods/1985-10-4/p565-borgida/p565-borgida.pdf; http://www.acm.org/pubs/citations/journals/tods/1985-10-4/p565-borgida/; http://www.acm.org/pubs/toc/Abstracts/tods/4995.html", abstract = "An exception-handling facility suitable for languages used to implement database-intensive information systems is presented. Such a mechanism facilitates the development and maintenance of more flexible software systems by supporting the abstraction of details concerning special or abnormal occurrences. The type constraints imposed by the schema as well as various semantic integrity assertions are considered to be normalcy conditions, and the key contribution of this work is to allow exceptions to these constraints to persist. To achieve this, solutions are proposed to a range of problems, including sharing and computing with exceptional information, exception handling by users, the logic of constraints with exceptions, and implementation issues. The use of exception handling in dealing with null values, estimates, and measurement is also illustrated.", acknowledgement = ack-nhfb, affiliation = "Rutgers Univ, Dep of Computer Science, New Brunswick, NJ, USA", affiliationaddress = "Rutgers Univ, Dep of Computer Science, New Brunswick, NJ, USA", annote = "Adding exception handling to database systems to deal with unusual, unknown, or otherwise exceptional attribute values. A semantic extension that may inspire KBMSers. I have a report in my office on the possibilities of this approach, by Alex Borgida of Rutgers. It's very readable, and it may inspire someone to cook up such a scheme of his or her own for Naxos, thesis, or whatever. -----Marianne W. W.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming languages; database systems; exception handling; languages; semantic integrity; theory; type constraints, design; verification", subject = "{\bf D.2.5}: Software, SOFTWARE ENGINEERING, Testing and Debugging, Error handling and recovery. {\bf H.2.0}: Information Systems, DATABASE MANAGEMENT, General, Security, integrity, and protection. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data description languages (DDL). {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data manipulation languages (DML).", } @Article{Hagmann:1986:PAS, author = "Robert Brian Hagmann and Domenico Ferrari", title = "Performance Analysis of Several Back-End Database Architectures", journal = j-TODS, volume = "11", number = "1", pages = "1--26", month = mar, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1986-11-1/p1-hagmann/p1-hagmann.pdf; http://www.acm.org/pubs/citations/journals/tods/1986-11-1/p1-hagmann/; http://www.acm.org/pubs/toc/Abstracts/tods/5242.html", abstract = "The growing acceptance of database systems makes their performance increasingly more important. One way to gain performance is to off-load some of the functions of the database system to aback-end computer. The problem is what functions should be off-loaded to maximize the benefits of distributed processing. \par Our approach to this problem consisted of constructing several variants of an existing relational database system. INGRES, that partition the database system software into two parts, and assigning these two parts to two computers connected by a local area network. For the purposes of this experiment, six different variants of the database software were constructed to test the sir most interesting functional subdivisions. Each variant was then benchmarked using two different databases and query streams. The communication medium and the communication software were also benchmarked to measure their contribution to the performance of each configuration.\par Combining the database and network measurement results, various conclusions were reached about the viability of the configurations, the desirable properties of the communications mechanisms to he used, the operating system interface and overhead, and the performance of the database system. The variants to be preferred depend on the hardware technology, operating system features, database system internal structure, and network software overhead.", acknowledgement = ack-nhfb, affiliation = "Univ of California, Berkely, CA, USA", affiliationaddress = "Univ of California, Berkely, CA, USA", annote = "an experimental methodology using INGRES.", classification = "722; 723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Experimentation; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "back-end database architectures; computer networks --- local networks; computer systems, digital --- distributed; database systems; experimentation; Ingres database system; measurement; performance; relational databases, hardware support database machine TODS, design", subject = "{\bf H.2.6}: Information Systems, DATABASE MANAGEMENT, Database Machines. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems. {\bf C.4}: Computer Systems Organization, PERFORMANCE OF SYSTEMS. {\bf H.2.0}: Information Systems, DATABASE MANAGEMENT, General, INGRES.", } @Article{Garcia-Molina:1986:ABA, author = "H{\'e}ctor Garc{\'\i}a-Molina and Frank Pittelli and Susan Davidson", title = "Applications of {Byzantine} Agreement in Database Systems", journal = j-TODS, volume = "11", number = "1", pages = "27--47", month = mar, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1986-11-1/p27-molina/p27-molina.pdf; http://www.acm.org/pubs/citations/journals/tods/1986-11-1/p27-molina/; http://www.acm.org/pubs/toc/Abstracts/tods/5243.html", abstract = "In this paper we study when and how a Byzantine agreement protocol can be used in general-purpose database management, systems. We present an overview of the failure model used for Bizantine agreement, and of the protocol itself. We then present correctness criteria for database processing in this failure environment and discuss strategies for satisfying them. In doing this, we present new failure models for input\slash output nodes and study ways to distribute input transactions to processing nodes under these models. Finally, we investigate applications of Byzantine agreement protocols in the more common failure environment where processors are assumed to halt after a failure.", acknowledgement = ack-nhfb, affiliation = "Princeton Univ, Princeton, NJ, USA", affiliationaddress = "Princeton Univ, Princeton, NJ, USA", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; Byzantine agreement protocol; data processing; database systems; distributed; failure models; fault tolerance; reliability", subject = "{\bf D.4.5}: Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems. {\bf C.2.2}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases.", } @Article{Segev:1986:OJO, author = "Arie Segev", title = "Optimization of Join Operations in Horizontally Partitioned Database Systems", journal = j-TODS, volume = "11", number = "1", pages = "48--80", month = mar, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1986-11-1/p48-segev/p48-segev.pdf; http://www.acm.org/pubs/citations/journals/tods/1986-11-1/p48-segev/; http://www.acm.org/pubs/toc/Abstracts/tods/5241.html", abstract = "This paper analyzes the problem of joining two horizontally partitioned relations in a distributed database system. Two types of semijoin strategies are introduced, local and remote. Local semijoins are performed at the site of the restricted relation (or fragment), and remote semijoins can be performed at an arbitrary site. A mathematical model of a semijoin strategy for the case of remote semijoins is developed, and lower bounding and heuristic procedures are proposed. The results of computational experiments are reported. The experiments include an analysis of the heuristics' performance relative to the lower bounds, sensitivity analysis, and error analysis. These results reveal a good performance of the heuristic procedures, and demonstrate the benefit of using semijoin operations to reduce the size of fragments prior to their transmission. The algorithms for the case of remote semijoins were found to be superior to the algorithms for the case of local semijoins. In addition, we found that the estimation accuracy of the selectivity factors has a significant effect on the incurred communication cost.", acknowledgement = ack-nhfb, affiliation = "Univ of California, Berkely, CA, USA", affiliationaddress = "Univ of California, Berkely, CA, USA", classification = "723; 921", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming --- algorithms; database systems; distributed; horizontally partitioned database systems, query processing optimization tods; join operations; mathematical models; optimization", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf G.2.2}: Mathematics of Computing, DISCRETE MATHEMATICS, Graph Theory, Trees. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems. {\bf G.2.1}: Mathematics of Computing, DISCRETE MATHEMATICS, Combinatorics, Combinatorial algorithms.", } @Article{Gyssens:1986:CJD, author = "Marc Gyssens", title = "On the Complexity of Join Dependencies", journal = j-TODS, volume = "11", number = "1", pages = "81--108", month = mar, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "87g:68011", MRreviewer = "J. Paredaens", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1986-11-1/p81-gyssens/p81-gyssens.pdf; http://www.acm.org/pubs/citations/journals/tods/1986-11-1/p81-gyssens/; http://www.acm.org/pubs/toc/Abstracts/tods/5237.html", abstract = "In [10] a method is proposed for decomposing join dependencies (jds) in a relational database using the notion of a hinge. This method was subsequently studied in [11] and [12]. We show how the technique of decomposition can be used to make integrity checking more efficient. It turns out that it is important to find a decomposition that minimizes the number of edges of its largest element. We show that the decompositions obtained with the method described in [10] are optimal in this respect. This minimality criterion leads to the definition of the {\em degree of cyclicity}, which allows us to classify jds and leads to the notion of {\em n-cyclicity}, of which acyclicity is a special case for n = 2. We then show that, for a fixed value of n (which may be greater than 2). integrity checking can be performed in polynomial time provided we restrict ourselves to {\em n-cyclic\/} jds. Finally, we generalize a well-known characterization for acyclic jds by proving that n-cyclicity is equivalent to ``n-wise consistency implies global consistency.'' As a consequence, consistency checking can be performed in polynomial time if we restrict ourselves to n-cyclic jds, for a tired value of n, not necessarily equal to 2.", acknowledgement = ack-nhfb, affiliation = "Univ of Antwerp, Antwerp, Belg", affiliationaddress = "Univ of Antwerp, Antwerp, Belg", classification = "723; 921", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "CYCLICITY; database systems; DECOMPOSITION; JOIN DEPENDENCIES; MATHEMATICAL TECHNIQUES --- Graph Theory; Relational", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Normal forms. {\bf G.2.2}: Mathematics of Computing, DISCRETE MATHEMATICS, Graph Theory, Graph algorithms. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema. {\bf G.2.2}: Mathematics of Computing, DISCRETE MATHEMATICS, Graph Theory, Trees.", } @Article{Sacco:1986:FTE, author = "Giovanni Maria Sacco", title = "Fragmentation: a technique for Efficient Query Processing", journal = j-TODS, volume = "11", number = "2", pages = "113--133", month = jun, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: University of Torino, TR., Aug. 1983.", URL = "http://www.acm.org/pubs/articles/journals/tods/1986-11-2/p113-sacco/p113-sacco.pdf; http://www.acm.org/pubs/citations/journals/tods/1986-11-2/p113-sacco/; http://www.acm.org/pubs/toc/Abstracts/tods/5638.html", abstract = "A `divide and conquer' strategy to compute natural joins by sequential scans on unordered relations is described. This strategy is shown to always be better than merging scans when both relations must be sorted before joining, and generally better in practical cases when only the largest relation must be sorted.", acknowledgement = ack-nhfb, affiliation = "Univ di Torino", affiliationaddress = "Turin, Italy", annote = "Join by hashing: Create fragments by hashing, as many fragments as buffers can be allocated in memory. Then repeat that for the other relation. Then do a nested unsorted join, as Kim, W. 1980, on the fragment pairs.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Economics; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming --- Algorithms; database systems; divide-and-conquer algorithms; economics; fragmentation; natural joins, join hash partitioning overflow avoidance recursion parallelism TODS, algorithms; performance; query processing", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing.", } @Article{Beeri:1986:IAL, author = "Catriel Beeri and Michael Kifer", title = "An Integrated Approach to Logical Design of Relational Database Schemes", journal = j-TODS, volume = "11", number = "2", pages = "134--158", month = jun, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "848 633", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; Distributed/gesturing.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1986-11-2/p134-beeri/p134-beeri.pdf; http://www.acm.org/pubs/citations/journals/tods/1986-11-2/p134-beeri/; http://www.acm.org/pubs/toc/Abstracts/tods/214291.html", abstract = "We propose a new approach to the design of relational database schemes. The main features of the approach are the following:\par A combination of the traditional decomposition and synthesis approaches, thus allowing the use of both functional and multivalued dependencies. \par Separation of structural dependencies relevant for the design process from integrity constraints, that is, constraints that do not bear any structural information about the data and which should therefore be discarded at the design stage. This separation is supported by a simple syntactic test filtering out nonstructural dependencies.\par Automatic correction of schemes which lack certain desirable properties.", acknowledgement = ack-nhfb, affiliation = "Hebrew Univ of Jerusalem, Jerusalem, Isr", affiliationaddress = "Hebrew Univ of Jerusalem, Jerusalem, Isr", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "acyclic schemes, design; database systems; decomposition; functional dependencies; multivalued dependencies; relational; synthesis; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Normal forms.", } @Article{Mendelson:1986:IIC, author = "Haim Mendelson and Aditya N. Saharia", title = "Incomplete Information Costs and Database Design", journal = j-TODS, volume = "11", number = "2", pages = "159--185", month = jun, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1986-11-2/p159-mendelson/p159-mendelson.pdf; http://www.acm.org/pubs/citations/journals/tods/1986-11-2/p159-mendelson/; http://www.acm.org/pubs/toc/Abstracts/tods/5678.html", abstract = "This paper presents a methodology for trading-off the cost of incomplete information against the data-related costs in the design of database systems. It investigates how the usage patterns of the database, defined by the characteristics of information requests presented to it, affect its conceptual design. The construction of minimum-cost answers to information requests for a variety of query types and cost structures is also studied. The resulting costs of incomplete database information are balanced against the data-related costs in the derivation of the optimal design.", acknowledgement = ack-nhfb, affiliation = "Univ of Rochester, Rochester, NY, USA", affiliationaddress = "Univ of Rochester, Rochester, NY, USA", annote = "information value, missing data, decision theory framework, applied to ships in the Mediterranean.", classification = "723; 921", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Economics; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data-related costs, design; database systems; design; economics; incomplete information costs; optimization; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design. {\bf H.1.1}: Information Systems, MODELS AND PRINCIPLES, Systems and Information Theory. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval.", } @Article{Ginsburg:1986:CTS, author = "Seymour Ginsburg and Katsumi Tanaka", title = "Computation-Tuple Sequences and Object Histories", journal = j-TODS, volume = "11", number = "2", pages = "186--212", month = jun, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "848 634", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1986-11-2/p186-ginsburg/p186-ginsburg.pdf; http://www.acm.org/pubs/citations/journals/tods/1986-11-2/p186-ginsburg/; http://www.acm.org/pubs/toc/Abstracts/tods/5924.html", abstract = "A record-based, algebraically-oriented model is introduced for describing data for ``object histories'' (with computation), such as checking accounts, credit card accounts, taxes, schedules, and so on. The model consists of sequences of computation tuples defined by a computation-tuple sequence scheme (CSS). The CSS has three major features (in addition to input data): computation (involving previous computation tuples), ``uniform'' constraints (whose satisfaction by a computation-tuple sequence $u$ implies satisfaction by every interval of $u$ ), and specific sequences with which to start the valid computation-tuple sequences. A special type of CSS, called ``local,'' is singled out for its relative simplicity in maintaining the validity of a computation-tuple sequence. A necessary and sufficient condition for a CSS to be equivalent to at least one local CSS is given. Finally, the notion of ``local bisimulatability'' is introduced for regarding two CSS as conveying the same information, and two results on local bisimulatability in connection with local CSS are established.", acknowledgement = ack-nhfb, affiliation = "Univ of Southern California, Los Angeles, CA, USA", affiliationaddress = "Univ of Southern California, Los Angeles, CA, USA", annote = "Sequential history tuples and objects with input, computation, and result. Some constraints applied per sequential entry cause satisfaction of global constraints. Temporal issues are very specific.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computation history; data description; database state transitions; database systems; theory; transaction processing, algorithms; verification", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.1.0}: Information Systems, MODELS AND PRINCIPLES, General. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing.", } @Article{Garg:1986:OPK, author = "Anil K. Garg and C. C. Gotlieb", title = "Order-Preserving Key Transformations", journal = j-TODS, volume = "11", number = "2", pages = "213--234", month = jun, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1986-11-2/p213-garg/p213-garg.pdf; http://www.acm.org/pubs/citations/journals/tods/1986-11-2/p213-garg/; http://www.acm.org/pubs/toc/Abstracts/tods/5923.html", abstract = "File organizations based on conventional hash functions provide faster access to the stored records in comparison with tree-like file structures. Tree structures such as B** plus -trees and ISAM do provide for sequential processing, but require considerable storage for the indices. When sequential processing is needed a table that performs an order-preserving transformation on keys can be used. H is an order-preserving key transform if H(K//1) greater than equivalent to H(K//2), for all keys K//1 greater than K//2. We present methodologies for constructing such key transforms, and illustrate them for some real-life key sets. Storage requirements for the table needed to carry out the transformation are less than those needed for the indices.", acknowledgement = ack-nhfb, affiliation = "Univ of Toronto, Toronto, Ont, Can", affiliationaddress = "Univ of Toronto, Toronto, Ont, Can", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Management; Measurement; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "access methods, algorithms; data processing; database systems; design; dynamic files; file organization; key transformations; management; measurement; order-preserving hashing; performance; theory", subject = "{\bf E.5}: Data, FILES, Organization/structure. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods.", } @Article{Shapiro:1986:JPD, author = "Leonard D. Shapiro", title = "Join Processing in Database Systems with Large Main Memories", journal = j-TODS, volume = "11", number = "3", pages = "239--264", month = sep, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/database.bib; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1986-11-3/p239-shapiro/p239-shapiro.pdf; http://www.acm.org/pubs/citations/journals/tods/1986-11-3/p239-shapiro/; http://www.acm.org/pubs/toc/Abstracts/tods/6315.html", abstract = "We study algorithms for computing the equijoin of two relations in a system with a standard architecture hut with large amounts of main memory. Our algorithms are especially efficient when the main memory available is a significant fraction of the size of one of the relations to he joined; but they can be applied whenever there is memory equal to approximately the square root of the size of one relation. We present a new algorithm which is a hybrid of two hash-based algorithms and which dominates the other algorithms we present, including sort-merge. Even in a virtual memory environment, the hybrid algorithm dominates all the others we study.\par Finally, we describe how three popular tools to increase the efficiency of joins, namely filters, Babb arrays, and semijoins, can he grafted onto any of our algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; memory query evaluation classical simple hybrid hash joins TODS, algorithms; performance", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.2.0}: Information Systems, DATABASE MANAGEMENT, General. {\bf H.2.6}: Information Systems, DATABASE MANAGEMENT, Database Machines. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design.", } @Article{Gavish:1986:SQO, author = "Bezalel Gavish and Arie Segev", title = "Set Query Optimization in Distributed Database Systems", journal = j-TODS, volume = "11", number = "3", pages = "265--293", month = sep, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1986-11-3/p265-gavish/p265-gavish.pdf; http://www.acm.org/pubs/citations/journals/tods/1986-11-3/p265-gavish/; http://www.acm.org/pubs/toc/Abstracts/tods/6488.html", abstract = "This paper addresses the problem of optimizing queries that involve set operations (set queries) in a distributed relational database system. A particular emphasis is put on the optimization of such queries in horizontally partitioned database systems. A mathematical programming model of the set query problem is developed and its NP-completeness is proved. Solution procedures are proposed and computational results presented. One of the main results of the computational experiments is that, for many queries, the solution procedures are not sensitive to errors in estimating the size of results of set operations.", acknowledgement = ack-nhfb, annote = "The time complexity is NP-complete. Three approximations.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; languages; theory", subject = "{\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf G.2.2}: Mathematics of Computing, DISCRETE MATHEMATICS, Graph Theory, Trees. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing.", } @Article{Lafortune:1986:STM, author = "St{\'e}phane Lafortune and Eugene Wong", title = "A State Transition Model for Distributed Query Processing", journal = j-TODS, volume = "11", number = "3", pages = "294--322", month = sep, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/database.bib; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/des.bib; Misc/Discrete.event.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1986-11-3/p294-lafortune/p294-lafortune.pdf; http://www.acm.org/pubs/citations/journals/tods/1986-11-3/p294-lafortune/; http://www.acm.org/pubs/toc/Abstracts/tods/6460.html", abstract = "A state transition model for the optimization of query processing in a distributed database system is presented. The problem is parameterized by means of a state describing the amount of processing that has been performed at each site where the database is located. A state transition occurs each time a new join or semijoin is executed. Dynamic programming is used to compute recursively the costs of the states and the globally optimal solution, taking into account communication and local processing costs. The state transition model is general enough to account for the possibility of parallel processing among the various sites, as well as for redundancy in the database. The model also permits significant reductions of the necessary computations by taking advantage of simple additivity and site-uniformity properties of a cost model, and of clever strategies that improve on the basic dynamic programming algorithm.", acknowledgement = ack-nhfb, bib = "koz", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; design; performance; theory, Optimization TODS", subject = "{\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing.", } @Article{Lozinskii:1986:POI, author = "Eliezer L. Lozinskii", title = "A Problem-Oriented Inferential Database System", journal = j-TODS, volume = "11", number = "3", pages = "323--356", month = sep, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15 (68T20)", MRnumber = "87k:68025", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1986-11-3/p323-lozinskii/p323-lozinskii.pdf; http://www.acm.org/pubs/citations/journals/tods/1986-11-3/p323-lozinskii/; http://www.acm.org/pubs/toc/Abstracts/tods/6419.html", abstract = "Recently developed inferential database systems face some common problems: a very fast growth of search space and difficulties in recognizing inference termination (especially for recursive axioms). These shortcomings stem mainly from the fact that the inference process is usually separated from database operations. A problem-oriented inferential system i8 described which refers to the database prior to query (or subquery) processing, so that the inference from the very beginning is directed by data relevant to the query. A multiprocessor implementation of the system is presented based on a computer network conforming to database relations and axioms. The system provides an efficient indication of query termination, and is complete in the sense that it produces all correct answers to a query in a finite time.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; design; performance; theory; verification", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf I.2.3}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Deduction and Theorem Proving, Deduction. {\bf C.1.3}: Computer Systems Organization, PROCESSOR ARCHITECTURES, Other Architecture Styles, Data-flow architectures.", } @Article{Osborn:1986:DRD, author = "Sylvia L. Osborn and T. E. Heaven", title = "The Design of a Relational Database System with Abstract Data Types for Domains", journal = j-TODS, volume = "11", number = "3", pages = "357--373", month = sep, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1986-11-3/p357-osborn/p357-osborn.pdf; http://www.acm.org/pubs/citations/journals/tods/1986-11-3/p357-osborn/; http://www.acm.org/pubs/toc/Abstracts/tods/6461.html", abstract = "An extension to the relational model is described in which domains can he arbitrarily defined as abstract data types. Operations on these data types include primitive operations, aggregates, and transformations. It is shown that these operations make the query language complete in the sense of Chandra and Harel. The system has been designed in such a way that new data types and their operations can be defined with a minimal amount of interaction with the database management system.", acknowledgement = ack-nhfb, annote = "Operations on simple objects, operations on aggregates and `transformations' can be defined on relations. It is possible to implement a transitive closure RAD uses the data dictionary. ---Ong, Fogg and Stonebraker.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; languages; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf D.3.3}: Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Abstract data types. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages.", } @Article{Gawlick:1986:RIW, author = "Dieter Gawlick", title = "Report on the International Workshop on High-Performance Transaction Systems", journal = j-TODS, volume = "11", number = "4", pages = "375--377", month = dec, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1986-11-4/p375-gawlick/p375-gawlick.pdf; http://www.acm.org/pubs/citations/journals/tods/1986-11-4/p375-gawlick/; http://www.acm.org/pubs/toc/Abstracts/tods/17346.html", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; performance", subject = "{\bf A.0}: General Literature, GENERAL. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf C.4}: Computer Systems Organization, PERFORMANCE OF SYSTEMS, Reliability, availability, and serviceability.", } @Article{Mohan:1986:TMR, author = "C. Mohan and B. Lindsay and R. Obermarck", title = "Transaction Management in the {R*} Distributed Database Management System", journal = j-TODS, volume = "11", number = "4", pages = "378--396", month = dec, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1986-11-4/p378-mohan/p378-mohan.pdf; http://www.acm.org/pubs/citations/journals/tods/1986-11-4/p378-mohan/; http://www.acm.org/pubs/toc/Abstracts/tods/7266.html", abstract = "This paper deals with the transaction management aspects of the R* distributed database system. It concentrates primarily on the description of the R* commit protocols, Presumed Abort (PA) and Presumed Commit (PC). PA and PC are extensions of the well-known, two-phase (2P) commit protocol. PA is optimized for read-only transactions and a class of multisite update transactions, and PC is optimized for other classes of multisite update transactions. The optimizations result in reduced intersite message traffic and log writes, and, consequently, a better response time. The paper also discusses R*'s approach toward distributed deadlock detection and resolution.", acknowledgement = ack-nhfb, affiliation = "IBM, San Jose, CA, USA", affiliationaddress = "IBM, San Jose, CA, USA", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "commit protocols; concurrency control, RSTAR TODS, algorithms; database systems; deadlock victim selection; design; distributed; optimization; reliability; transaction management", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf D.4.1}: Software, OPERATING SYSTEMS, Process Management, Concurrency. {\bf D.4.1}: Software, OPERATING SYSTEMS, Process Management, Deadlocks. {\bf D.4.1}: Software, OPERATING SYSTEMS, Process Management, Synchronization. {\bf D.4.5}: Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf D.4.7}: Software, OPERATING SYSTEMS, Organization and Design, Distributed systems. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Recovery and restart. {\bf H.2.7}: Information Systems, DATABASE MANAGEMENT, Database Administration, Logging and recovery.", } @Article{Bayer:1986:CTR, author = "Rudolf Bayer", title = "Consistency of Transactions and Random Batch", journal = j-TODS, volume = "11", number = "4", pages = "397--404", month = dec, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1986-11-4/p397-bayer/p397-bayer.pdf; http://www.acm.org/pubs/citations/journals/tods/1986-11-4/p397-bayer/; http://www.acm.org/pubs/toc/Abstracts/tods/214287.html", abstract = "A synchronization technique and scheduling strategy is described, which allows us to run a batch process simultaneously with on-line transactions. The batch process and the transactions are serialized in such a way that consistency level 3 is achieved.", acknowledgement = ack-nhfb, affiliation = "Technische Univ Muenchen, West Ger", affiliationaddress = "Technische Univ Muenchen, West Ger", classification = "723; 913", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency control, algorithms; consistency of transactions; database systems; design; performance; random batch; scheduling; synchronization", subject = "{\bf D.4.1}: Software, OPERATING SYSTEMS, Process Management. {\bf D.4.7}: Software, OPERATING SYSTEMS, Organization and Design. {\bf E.5}: Data, FILES. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems.", } @Article{ONeil:1986:ETM, author = "Patrick E. O'Neil", title = "The {Escrow} Transactional Method", journal = j-TODS, volume = "11", number = "4", pages = "405--430", month = dec, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1986-11-4/p405-o_neil/p405-o_neil.pdf; http://www.acm.org/pubs/citations/journals/tods/1986-11-4/p405-o_neil/; http://www.acm.org/pubs/toc/Abstracts/tods/7265.html", abstract = "A method is presented for permitting record updates by long-lived transactions without forbidding simultaneous access by other users to records modified. Earlier methods presented separately by Gawlick and Reuter are comparable but concentrate on ``hot-spot'' situations, where even short transactions cannot lock frequently accessed fields without causing bottlenecks. The Escrow Method offered here is designed to support nonblocking record updates by transactions that are ``long lived'' and thus require long periods to complete. Recoverability of intermediate results prior to commit thus becomes a design goal, so that updates as of a given time can be guaranteed against memory or media failure while still retaining the prerogative to abort. This guarantee basically completes phase one of a two-phase commit, and several advantages result: (1) As with Gawlick's and Reuter's methods, high-concurrency items in the database will not act as a bottleneck; (2) transaction commit of different updates can be performed asynchronously, allowing natural distributed transactions; indeed, distributed transactions in the presence of delayed messages or occasional line disconnection become feasible in a way that we argue will tie up minimal resources for the purpose intended; and (3) it becomes natural to allow for human interaction in the middle of a transaction without loss of concurrent access or any special difficulty for the application programmer. The Escrow Method, like Gawlick's Fast Path and Reuter's Method, requires the database system to be an ``expert'' about the type of transactional updates performed, most commonly updates involving incremental changes to aggregate quantities. However, the Escrow Method is extendable to other types of updates.", acknowledgement = ack-nhfb, affiliation = "Computer Corp of America, Cambridge, MA, USA", affiliationaddress = "Computer Corp of America, Cambridge, MA, USA", annote = "For aggregate values (counts, sum) concurrency control can use soft tolerances and keep them in escrow", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database systems; design; escrow transactional method; long-lived transactions; multiuser environment, locking quantities, not variables TODS, algorithms; nested transactions; performance; theory; two-phase commit", subject = "{\bf D.4.1}: Software, OPERATING SYSTEMS, Process Management, Concurrency. {\bf D.4.1}: Software, OPERATING SYSTEMS, Process Management, Deadlocks. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Recovery and restart. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Escrow. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing.", } @Article{Hsu:1986:PTP, author = "Meichun Hsu and Arvola Chan", title = "Partitioned Two-Phase Locking", journal = j-TODS, volume = "11", number = "4", pages = "431--446", month = dec, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1986-11-4/p431-hsu/p431-hsu.pdf; http://www.acm.org/pubs/citations/journals/tods/1986-11-4/p431-hsu/; http://www.acm.org/pubs/toc/Abstracts/tods/7477.html", abstract = "In a large integrated database, there often exists an ``information hierarchy,'' where both raw data and derived data are stored and used together. Therefore, among update transactions, there will often be some that perform only read accesses from a certain (i.e., the ``raw'' data) portion of the database and write into another (i.e., the ``derived'' data) portion. A conventional concurrency control algorithm would have treated such transactions as regular update transactions and subjected them to the usual protocols for synchronizing update transactions. In this paper such transactions are examined more closely. The purpose is to devise concurrency control methods that allow the computation of derived information to proceed without interfering with the updating of raw data. \par The first part of the paper presents a proof method for correctness of concurrency control algorithms in a hierarchically decomposed database. The proof method provides a framework for understanding the intricacies in dealing with hierarchically decomposed databases. The second part of the paper is an application of the proof method to show the correctness of a two-phase-locking- based algorithm, called partitioned two-phase locking, for hierarchically decomposed databases. This algorithm is a natural extension to the Version Pool method proposed previously in the literature.", acknowledgement = ack-nhfb, affiliation = "Harvard Univ, Cambridge, MA, USA", affiliationaddress = "Harvard Univ, Cambridge, MA, USA", annote = "revisions also for update", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming --- algorithms; concurrency control; database systems; theory; transaction processing, algorithms; two-phase locking", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design.", } @Article{Luk:1986:EEL, author = "W. S. Luk and Steve Kloster", title = "{ELFS}: {English} Language from {SQL}", journal = j-TODS, volume = "11", number = "4", pages = "447--472", month = dec, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sun Dec 8 08:54:10 MST 1996", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In this paper we describe a system which, given a query in SQL-like relational database language, will display its meaning in clear, unambiguous natural language. The syntax-driven translation mechanism is independent of the application domain. It has direct applications in designing computer-based SQL tutorial systems and program debugging systems. The research results obtained in the paper will also be useful in query optimization and design of a more user-friendly language front-end for casual users.", acknowledgement = ack-nhfb, affiliation = "Simon Fraser Univ, Burnaby, BC, Can", affiliationaddress = "Simon Fraser Univ, Burnaby, BC, Can", annote = "display meaning in natural language is independent of the application domain.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming --- Program Debugging; database systems; ELFS; Query Languages; relational database language; SQL", } @Article{Sacco:1986:BMR, author = "Giovanni Maria Sacco and Mario Schkolnick", title = "Buffer Management in Relational Database Systems", journal = j-TODS, volume = "11", number = "4", pages = "473--498", month = dec, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1986-11-4/p473-sacco/p473-sacco.pdf; http://www.acm.org/pubs/citations/journals/tods/1986-11-4/p473-sacco/; http://www.acm.org/pubs/toc/Abstracts/tods/7336.html", abstract = "The hot-set model, characterizing the buffer requirements of relational queries, is presented. This model allows the system to determine the optimal buffer space to be allocated to a query; it can also be used by the query optimizer to derive efficient execution plans accounting for the available buffer space, and by a query scheduler to prevent thrashing. The hot-set model is compared with the working-set model. A simulation study is presented.", acknowledgement = ack-nhfb, acmcr = "8708-0695", affiliation = "Univ di Torino", affiliationaddress = "Turin, Italy", annote = "The hot-set model provides a more meaningful measure of cost than simple I/O counts.", classification = "723; 913", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "buffer management; database systems; performance; query optimizer, algorithms; query processing; relational; scheduling; theory", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design. {\bf D.4.2}: Software, OPERATING SYSTEMS, Storage Management.", } @Article{Ariav:1986:TOD, author = "Gad Ariav", title = "A Temporally Oriented Data Model", journal = j-TODS, volume = "11", number = "4", pages = "499--527", month = dec, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1986-11-4/p499-ariav/p499-ariav.pdf; http://www.acm.org/pubs/citations/journals/tods/1986-11-4/p499-ariav/; http://www.acm.org/pubs/toc/Abstracts/tods/7350.html", abstract = "The research into time and data models has so far focused on the identification of extensions to the classical relational model that would provide it with ``adequate'' semantic capacity to deal with time. The temporally oriented data model (TODM) presented in this paper is a result of a different approach, namely, it directly operationalizes the pervasive three-dimensional metaphor for time. One of the main results is thus the development of the notion of the data cube: a three-dimensional and inherently temporal data construct where time, objects, and attributes are the primary dimensions of stored data. TODM's cube adds historical depth to the tabular notions of data and provides a framework for storing and retrieving data within their temporal context. The basic operations in the model allow the formation of new cubic views from existing ones, or viewing data as one moves up and down in time within cubes.\par This paper introduces TODM, a consistent set of temporally oriented data constructs, operations, and constraints, and then presents TOSQL, a corresponding end-user's SQL-like query syntax. The model is a restricted but consistent superset of the relational model, and the query syntax incorporates temporal notions in a manner that likewise avoids penalizing users who are interested solely in the current view of data (rather than in a temporal perspective). The naturalness of the spatial reference to time and the added semantic capacity of TODM come with a price--the definitions of the cubic constructs and basic operations are relatively cumbersome. As rudimentary as it is, TODM nonetheless provides a comprehensive basis for formulating an external data model for a temporally oriented database.", acknowledgement = ack-nhfb, affiliation = "New York Univ, New York, NY, USA", affiliationaddress = "New York Univ, New York, NY, USA", annote = "at least one timestamp, time of record, plus other temporal --- event stamps.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data cube; database systems; information modeling; languages; relational; temporally oriented data model; theory; TODM, design; TOSQL", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data manipulation languages (DML). {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages.", } @Article{Albano:1986:OSG, author = "Antonio Albano and S. Alfo and Luca Cardelli and Renzo Orsini", title = "An Overview of {SIDEREUS}: a Graphical Database Schema Editor for {Galileo}", journal = j-TODS, volume = "11", number = "??", pages = "568--571", month = "????", year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 10 12:51:20 1996", bibsource = "Distributed/gesturing.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", xxnote = "This paper does not seem to be published in TODS.", } @Article{Durand:1986:FMS, author = "Charles Durand", title = "Forward Multidimensional Search with Applications to Information Retrieval", journal = j-TODS, volume = "??", number = "??", pages = "??--??", month = sep, year = "1986", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 10 12:51:25 1996", bibsource = "Database/Wiederhold.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Submitted.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", xxnote = "This paper does not seem to be published in TODS.", } @Article{Spyratos:1987:PMD, author = "Nicolas Spyratos", title = "The partition model: a deductive database model", journal = j-TODS, volume = "12", number = "1", pages = "1--37", month = mar, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: Institut National de la Recherche en Informatique et Automatique, TR-286, Apr. 1983.", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-1/p1-spyratos/p1-spyratos.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-1/p1-spyratos/; http://www.acm.org/pubs/toc/Abstracts/tods/22718.html", abstract = "We present a new database model in which each attribute is modeled by a family of disjoint subsets of an underlying population of objects. Such a family is called a partitioning, and the set of all partitionings is turned into a lattice by appropriately defining product and sum. A database is seen as a function from a sublattice into the lattice of partitionings. The model combines the following features:\par (1) syntactic simplicity (essentially that of the relational model),\par (2) powerful means for the specification of semantic information (in the form of lattice equations), and \par (3) deductive capability (essentially that of set theory).\par The relational model of data and the basic constructs of semantic modeling can be embedded into our model in a simple and straightforward manner.", acknowledgement = ack-nhfb, affiliation = "Univ de Paris-Sud, Orsay, Fr", affiliationaddress = "Univ de Paris-Sud, Orsay, Fr", annote = "Type hierarchies and lattices.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database semantics; database systems; deductive database model; partition model; theory", subject = "{\bf F.3.2}: Theory of Computation, LOGICS AND MEANINGS OF PROGRAMS, Semantics of Programming Languages, Algebraic approaches to semantics. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models.", } @Article{Wu:1987:ASM, author = "C. T. Wu and Walter A. Burkhard", title = "Associative Searching in Multiple Storage Units", journal = j-TODS, volume = "12", number = "1", pages = "38--64", month = mar, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Discusses interpolation hashing, a multidimensional variant of linear hashing.", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-1/p38-wu/p38-wu.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-1/p38-wu/; http://www.acm.org/pubs/toc/Abstracts/tods/12048.html", abstract = "A file maintenance model, called the multiple random access storage units model, is introduced. Storage units can be accessed simultaneously, and the parallel processing of an associative query is achieved by distributing data evenly among the storage units. Maximum parallelism is obtained when data satisfying an associative query are evenly distributed for every possible query. An allocation scheme called $M$-cycle allocation is proposed to maintain large files of data on multiple random access storage units. The allocation scheme provides an efficient and straightforward indexing over multidimensional key spaces and supports the parallel processing of orthogonal range queries. Our analysis shows that $M$-cycle allocation achieves the near-optimum parallelism for processing the orthogonal range queries. Moreover, there is no duplication of records and no increase in insertion\slash deletion cost.", acknowledgement = ack-nhfb, affiliation = "Univ of California, San Diego, CA, USA", affiliationaddress = "Univ of California, San Diego, CA, USA", classification = "723; 903", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "associative searching; data processing --- file organization; database systems; design; file maintenance model; information science --- information retrieval; multiple storage units; performance; random access, algorithms; theory", subject = "{\bf E.5}: Data, FILES. {\bf E.1}: Data, DATA STRUCTURES. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design.", } @Article{Lomet:1987:PEF, author = "David B. Lomet", title = "Partial Expansions for File Organizations with an Index", journal = j-TODS, volume = "12", number = "1", pages = "65--84", month = mar, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-1/p65-lomet/p65-lomet.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-1/p65-lomet/; http://www.acm.org/pubs/toc/Abstracts/tods/12049.html", abstract = "A new way to increase file space in dynamically growing files is introduced in which substantial improvement in file utilization can be achieved. It makes use of partial expansions in which, instead of doubling the space associated with some part of the file, the space grows at a slower rate. Unlike previous versions of partial expansion in which the number of buckets involved in file growth is increased by less than a factor of two, the new method expands file space by increasing bucket size via `elastic buckets'. This permits partial expansions to be used with a wide range of indexed files, including B-trees. The results of using partial expansions are analyzed, and the analysis confirmed by a simulation study. The analysis and simulation demonstrate that the file utilization gains are substantial and that fears of excessive insertion cost resulting from more frequent file growth are unfounded.", acknowledgement = ack-nhfb, affiliation = "Wang Inst of Graduate Studies, Tyngboro, MA, USA", affiliationaddress = "Wang Inst of Graduate Studies, Tyngboro, MA, USA", annote = "a way to increase file space with substantial improvement in file utilization elastic buckets come in a number of sizes.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data processing; database systems; File Organization; FILE UTILIZATION; INSERTION COST; PARTIAL EXPANSIONS", subject = "{\bf D.4.3}: Software, OPERATING SYSTEMS, File Systems Management, Access methods. {\bf D.4.3}: Software, OPERATING SYSTEMS, File Systems Management, File organization. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf H.3.2}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Storage, File organization.", } @Article{Fedorowicz:1987:DPE, author = "Jane Fedorowicz", title = "Database Performance Evaluation in an Indexed File Environment", journal = j-TODS, volume = "12", number = "1", pages = "85--110", month = mar, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-1/p85-fedorowicz/p85-fedorowicz.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-1/p85-fedorowicz/; http://www.acm.org/pubs/toc/Abstracts/tods/13675.html", abstract = "The use of database systems for managerial decision making often incorporates information-retrieval capabilities with numeric report generation. Of great concern to the user of such a system is the response time associated with issuing a query to the database. This study presents a procedure for estimating response time for one of the most frequently encountered physical storage mechanisms, the indexed file. The model provides a fairly high degree of accuracy, but is simple enough so that the cost of applying the model is not exorbitant. The model incorporates the knowledge that the distribution of access key occurrences is known to follow Zipf's law. It first estimates the access time required to complete the query, which includes the time needed for all input and output transactions, and CPU time used in performing the search. The effects of multiple users on an individual's response time are then assessed using a simple regression estimation technique. The two-step procedure allows for the separation of access time from multiuser influences.", acknowledgement = ack-nhfb, affiliation = "Boston Univ, Boston, MA, USA", affiliationaddress = "Boston Univ, Boston, MA, USA", annote = "a procedure for estimating response time; distribution of access key occurrences follow Zipf's law. Early version with Kellogg, J. L. Model provides a fairly high degree of accuracy but is simple. The effects of multiple users are assessed using simple regression estimation.", classification = "723; 912; 922", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data processing --- File organization; database performance; database systems; indexed file environment; MANAGEMENT --- Information Systems; multiple users, design; Performance; performance; response time; statistical methods --- regression analysis; Zipf's law", subject = "{\bf D.4.3}: Software, OPERATING SYSTEMS, File Systems Management, File organization. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf H.3.2}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Storage, File organization. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Retrieval models.", } @Article{Ozsoyoglu:1987:NNF, author = "Z. Meral {\"O}zsoyo{\u{g}}lu and Li-Yan Yuan", title = "A New Normal Form for Nested Relations", journal = j-TODS, volume = "12", number = "1", pages = "111--136", month = mar, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "886 100", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-1/p111-ozsoyoglu/p111-ozsoyoglu.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-1/p111-ozsoyoglu/; http://www.acm.org/pubs/toc/Abstracts/tods/13676.html", abstract = "We consider nested relations whose schemes are structured as trees, called scheme trees, and introduce a normal form for such relations, called the nested normal form. Given a set of attributes $U$, and a set of multivalued dependencies (MVDs) $M$ over these attributes, we present an algorithm to obtain a nested normal form decomposition of $U$ with respect to $M$. Such a decomposition has several desirable properties, such as explicitly representing a set of full and embedded MVDs implied by $M$, and being a faithful and nonredundant representation of $U$. Moreover, if the given set of MVDs is conflict-free, then the nested normal form decomposition is also dependency-preserving. Finally, we show that if $M$ is conflict-free, then the set of root-to-leaf paths of scheme trees in nested normal form decomposition is precisely the unique 4NF decomposition $ [9, 16] $ of $U$ with respect to $M$.", acknowledgement = ack-nhfb, affiliation = "Case Western Reserve Univ, Cleveland, OH, USA", affiliationaddress = "Case Western Reserve Univ, Cleveland, OH, USA", annote = "non-first normal form.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data processing; data structures; database systems --- design; decomposition, algorithms; design; multivalued dependency; nested relations; normal form; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Normal forms.", } @Article{Christodoulakis:1987:ARP, author = "Stavros Christodoulakis", title = "Analysis of Retrieval Performance for Records and Objects using Optical Disk Technology", journal = j-TODS, volume = "12", number = "2", pages = "137--169", month = jun, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-2/p137-christodoulakis/p137-christodoulakis.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-2/p137-christodoulakis/; http://www.acm.org/pubs/toc/Abstracts/tods/23015.html", abstract = "In this paper we examine the problem of object and record retrieval from optical disks. General objects (such as images, documents, etc.) may be long and their length may have high variance. We assume that all the components of an object are stored consecutively in storage to speed-up retrieval performance. We first present an optical disk model and an optimal schedule for retrieval of records and objects which qualify in a single query on a file stored on an optical disk device. We then provide {\em exact\/} and {\em approximate\/} analytic results for evaluating the retrieval performance for objects from an optical disk. The analysis provides some basic analytic tools for studying the performance of various file and database organizations for optical disks. The results involve probability distribution of block accesses, probability distributions of span accesses, and probability distribution of seek times. Record retrieval is an important special case. This analysis differs from similar ones in database environments in the following respects: (1) the large size and large variance of the size of objects; (2) crossing of track boundaries by objects; (3) the capability for span access that optical disks provide (e.g., when the optical assembly is located in a given position, information can be read from a number of consecutive tracks (span) with a small additional cost).", acknowledgement = ack-nhfb, affiliation = "Univ of Waterloo, Waterloo, Ont, Can", affiliationaddress = "Univ of Waterloo, Waterloo, Ont, Can", classification = "723; 741; 903", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Measurement; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data storage, optical --- storage devices; information retrieval; information science; optical disk technology, measurement; performance; retrieval performance; theory", subject = "{\bf C.4}: Computer Systems Organization, PERFORMANCE OF SYSTEMS, Modeling techniques. {\bf D.4.3}: Software, OPERATING SYSTEMS, File Systems Management, File organization.", } @Article{Herlihy:1987:DQA, author = "Maurice Herlihy", title = "Dynamic Quorum Adjustment for Partitioned Data", journal = j-TODS, volume = "12", number = "2", pages = "170--194", month = jun, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-2/p170-herlihy/p170-herlihy.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-2/p170-herlihy/; http://www.acm.org/pubs/toc/Abstracts/tods/22953.html", abstract = "A partition occurs when functioning sites in a distributed system are unable to communicate. This paper introduces a new method for managing replicated data objects in the presence of partitions. Each operation provided by a replicated object has a set of quorums, which are sets of sites whose cooperation suffices to execute the operation. The method permits an object's quorums to be adjusted dynamically in response to failures and recoveries. A transaction that is unable to progress using one set of quorums may switch to another, more favorable set, and transactions in different partitions may progress using different sets. This method has three novel aspects: (1) it supports a wider range of quorums than earlier proposals, (2) it scales up effectively to large systems because quorum adjustments do not require global reconfiguration, and (3) it systematically exploits the semantics of typed objects to support more flexible quorum adjustment.", acknowledgement = ack-nhfb, affiliation = "Carnegie-Mellon Univ, Pittsburgh, PA, USA", affiliationaddress = "Carnegie-Mellon Univ, Pittsburgh, PA, USA", annote = "Each operation provided by a replicated object has a set of quorums, sites whose cooperation suffices to execute the operation.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Languages; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer systems, digital --- distributed; database systems; distributed; dynamic quorum adjustment; languages; partitioned data, algorithms; reliability", subject = "{\bf D.3.3}: Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Abstract data types. {\bf D.4.3}: Software, OPERATING SYSTEMS, File Systems Management, Distributed file systems. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems. {\bf D.3.3}: Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Data types and structures. {\bf D.4.5}: Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing.", } @Article{Ellis:1987:CLH, author = "Carla Schlatter Ellis", title = "Concurrency in Linear Hashing", journal = j-TODS, volume = "12", number = "2", pages = "195--217", month = jun, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in \cite{ACM:1985:PFA}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-2/p195-ellis/p195-ellis.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-2/p195-ellis/; http://www.acm.org/pubs/toc/Abstracts/tods/22954.html", abstract = "Concurrent access to complex shared data structures, particularly structures useful as database indices, has long been of interest in the database community. In dynamic databases, tree structures such as B-trees have been used as indices because of their ability to handle growth; whereas hashing has been used for fast access in relatively static databases. Recently, a number of techniques for dynamic hashing have appeared. They address the major deficiency of traditional hashing when applied to databases that experience significant change in the amount of data being stored. This paper presents a solution that allows concurrency in one of these dynamic hashing data structures, namely linear hash files. The solution is based on locking protocols and minor modifications in the data structures.", acknowledgement = ack-nhfb, affiliation = "Univ of Rochester, USA", affiliationaddress = "Univ of Rochester, USA", annote = "Searching can proceed in parallel with splits. Also discusses distributed access.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "complex shared data structures, algorithms; concurrent access; data processing; Data Structures; database systems; linear hashing", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf D.4.1}: Software, OPERATING SYSTEMS, Process Management, Multiprocessing/multiprogramming. {\bf E.1}: Data, DATA STRUCTURES. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design. {\bf H.3.2}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Storage, File organization.", } @Article{Valduriez:1987:JI, author = "Patrick Valduriez", title = "Join Indices", journal = j-TODS, volume = "12", number = "2", pages = "218--246", month = jun, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/database.bib; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Compares join indices with inverted indices, clustered indices, B+ trees, linked lists, and hybrid hash techniques.", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-2/p218-valduriez/p218-valduriez.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-2/p218-valduriez/; http://www.acm.org/pubs/toc/Abstracts/tods/22955.html", abstract = "In new application areas of relational database systems, such as artificial intelligence, the join operator is used more extensively than in conventional applications. In this paper, we propose a simple data structure, called a join index, for improving the performance of joins in the context of complex queries. For most of the joins, updates to join indices incur very little overhead. Some properties of a join index are (i) its efficient use of memory and adaptiveness to parallel execution, (ii) its compatibility with other operations (including select and union), (iii) its support for abstract data type join predicates, (iv) its support for multirelation clustering, and (v) its use in representing directed graphs and in evaluating recursive queries. Finally, the analysis of the join algorithm using join indices shows its excellent performance.", acknowledgement = ack-nhfb, affiliation = "Microelectronics \& Computer Technology Corp, Austin, TX, USA", affiliationaddress = "Microelectronics \& Computer Technology Corp, Austin, TX, USA", annote = "arrays of combined indices are maintained to precompute joins among tuples. The technique is very similar to that implemented as ADABAS correlators.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "abstract data types; computer programming --- Algorithms; data processing --- Data Structures; database systems; design; join algorithm, including semi-join join index with rid list from selection index TODS, algorithms; JOIN index; multirelation clustering; performance; Relational", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf E.1}: Data, DATA STRUCTURES, Trees. {\bf E.5}: Data, FILES, Organization/structure. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.3.1}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Content Analysis and Indexing, Indexing methods.", } @Article{Snodgrass:1987:TQL, author = "Richard Snodgrass", title = "The {Temporal Query Language TQUEL}", journal = j-TODS, volume = "12", number = "2", pages = "247--298", month = jun, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-2/p247-snodgrass/p247-snodgrass.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-2/p247-snodgrass/; http://www.acm.org/pubs/toc/Abstracts/tods/22956.html", abstract = "Recently, attention has been focused on {\em temporal databases}, representing an enterprise over time. We have developed a new language, {\em Tquel}, to query a temporal database. TQuel was designed to be a minimal extension, both syntactically and semantically, of Quel, the query language in the Ingres relational database management system. This paper discusses the language informally, then provides a tuple relational calculus semantics for the TQuel statements that differ from their Quel counterparts, including the modification statements. The three additional temporal constructs defined in Tquel are shown to be direct semantic analogues of Quel's where clause and target list. We also discuss reducibility of the semantics to Quel's semantics when applied to a static database. TQuel is compared with ten other query languages supporting time.", acmcrnumber = "8712-1006", affiliation = "Univ of North Carolina, Chapel Hill, NC, USA", affiliationaddress = "Univ of North Carolina, Chapel Hill, NC, USA", annote = "Describes extensions to Quel to handle temporal queries. Three kinds of temporal information are handled: `Transaction time', when information was stored in the database, `valid time' when the stored info models reality, and `user-defined time' explicitly stored by user in the database.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming languages; database systems; relational calculus; temporal databases; temporal query language; theory; TQUEL; tuple calculus, languages", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.7}: Information Systems, DATABASE MANAGEMENT, Database Administration, Logging and recovery. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, TQUEL.", } @Article{Wong:1987:MIR, author = "S. K. M. Wong and W. Ziarko and V. V. Raghavan and P. C. N. Wong", title = "On Modeling of Information Retrieval Concepts in Vector Spaces", journal = j-TODS, volume = "12", number = "2", pages = "299--321", month = jun, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-2/p299-wong/p299-wong.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-2/p299-wong/; http://www.acm.org/pubs/toc/Abstracts/tods/22957.html", abstract = "The Vector Space Model (VSM) has been adopted in information retrieval as a means of coping with inexact representation of documents and queries, and the resulting difficulties in determining the relevance of a document relative to a given query. The major problem in employing this approach is that the explicit representation of term vectors is not known a priori. Consequently, earlier researchers made the assumption that the vectors corresponding to terms are pairwise orthogonal. Such an assumption is clearly unrealistic. Although attempts have been made to compensate for this assumption by some separate, corrective steps, such methods are ad hoc and, in most cases, formally inconsistent.\par In this paper, a generalization of the VSM, called the GVSM, is advanced. The developments provide a solution not only for the computation of a measure of similarity (correlation) between terms, but also for the incorporation of these similarities into the retrieval process.\par The major strength of the GVSM derives from the fact that it is theoretically sound and elegant. Furthermore, experimental evaluation of the model on several test collections indicates that the performance is better than that of the VSM. Experiments have been performed on some variations of the GVSM, and all these results have also been compared to those of the VSM, based on inverse document frequency weighting. These results and some ideas for the efficient implementation of the GVSM are discussed.", acknowledgement = ack-nhfb, affiliation = "Univ of Regina, Regina, Sask, Can", affiliationaddress = "Univ of Regina, Regina, Sask, Can", annote = "The space for both documents and queries is an n-dimensional vector space. In GVSM, terms are not assumed to be linearly independent; measure of independence is based on the number of common documents.", classification = "723; 903", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Experimentation; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "document representation; experimentation; generalized vector space; Information Retrieval; information retrieval systems --- Mathematical Models; information science; languages; query representation, design; theory", subject = "{\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Retrieval models. {\bf H.3.1}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Content Analysis and Indexing, Thesauruses. {\bf H.3.1}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Content Analysis and Indexing, Indexing methods. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval.", } @Article{Rybinski:1987:FOL, author = "Henryk Rybi{\'n}ski", title = "On First-Order-Logic Databases", journal = j-TODS, volume = "12", number = "3", pages = "325--349", month = sep, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15 (03B70)", MRnumber = "88j:68033", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-3/p325-rybinski/p325-rybinski.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-3/p325-rybinski/; http://www.acm.org/pubs/toc/Abstracts/tods/27630.html", abstract = "The use of first-order logic as database logic is shown to be powerful enough for formalizing and implementing not only relational but also hierarchical and network-type databases. It enables one to treat all the types of databases in a uniform manner. This paper focuses on the database language for heterogeneous databases. The language is shown to be general enough to specify constraints for a particular type of database, so that a specification of database type can be ``translated'' to the specification given in the database language, creating a ``logical environment'' for different views that can be defined by users. Owing to the fact that any database schema is seen as a first-order theory expressed by a finite set of sentences, the problems concerned with completeness and compactness of the database logic discussed by Jacobs (``On Database Logic,'' {\em J. ACM 29\/}, 2 (Apr. 1982), 310-332) are avoided.", acknowledgement = ack-nhfb, annote = "Successor of Jacobs's work. Language Ld can specify constraints for any database type, which can then be `translated' to a particular database domain.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer metatheory --- formal logic; database logic; design; first-order logic; hierarchical databases; languages; network databases; relational databases; theory, database systems", review = "ACM Computing Reviews, Jan 1989", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data manipulation languages (DML). {\bf F.4.1}: Theory of Computation, MATHEMATICAL LOGIC AND FORMAL LANGUAGES, Mathematical Logic. {\bf I.2.4}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Knowledge Representation Formalisms and Methods, Predicate logic.", } @Article{Stonebraker:1987:EDS, author = "Michael Stonebraker and Jeff Anton and Eric Hanson", title = "Extending a Database System with Procedures", journal = j-TODS, volume = "12", number = "3", pages = "350--376", month = sep, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: UCB/ERL memo M85/59, 1985.", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-3/p350-stonebraker/p350-stonebraker.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-3/p350-stonebraker/; http://www.acm.org/pubs/toc/Abstracts/tods/27631.html", abstract = "This paper suggests that more powerful database systems (DBMS) can be built by supporting database procedures as full-fledged database objects. In particular, allowing fields of a database to be a collection of queries in the query language of the system is shown to allow the natural expression of complex data relationships. Moreover, many of the features present in object-oriented systems and semantic data models can be supported by this facility. \par In order to implement this construct, extensions to a typical relational query language must be made, and considerable work on the execution engine of the underlying DBMS must be accomplished. This paper reports on the extensions for one particular query language and data manager and then gives performance figures for a prototype implementation. Even though the performance of the prototype is competitive with that of a conventional system, suggestions for improvement are presented.", acknowledgement = ack-nhfb, annote = "Stored procedures follow DBTG suggestions from 1971. The INGRES+ results were `competitive'.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database procedures, design; database systems; object-oriented systems; relational query language; semantic data models", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing.", } @Article{Ozsoyoglu:1987:RMM, author = "Z. Meral {\"O}zsoyo{\u{g}}lu and Li-Yan Yuan", title = "Reduced {MVDs} and Minimal Covers", journal = j-TODS, volume = "12", number = "3", pages = "377--394", month = sep, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "88h:68017", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/citations/journals/tods/1987-12-3/p377-ozsoyoolu/; http://www.acm.org/pubs/toc/Abstracts/tods/214286.html", abstract = "Multivalued dependencies (MVDs) are data dependencies that appear frequently in the ``real world'' and play an important role in designing relational database schemes. Given a set of MVDs to constrain a database scheme, it is desirable to obtain an equivalent set of MVDs that do not have any redundancies. In this paper we define such a set of MVDs, called reduced MVDs, and present an algorithm to obtain reduced MVDs. We also define a minimal cover of a set of MVDs, which is a set of reduced MVDs, and give an efficient method to find such a minimal cover. The significance and properties of reduced MVDs are also discussed in the context of database design (e.g., 4NF decomposition) and conflict-free MVDs.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; computer programming --- algorithms; database schema design; database systems; design; minimal covers; multivalued dependencies; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Normal forms.", } @Article{Faloutsos:1987:OSE, author = "Christos Faloutsos and Stavros Christodoulakis", title = "Optimal Signature Extraction and Information Loss", journal = j-TODS, volume = "12", number = "3", pages = "395--428", month = sep, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-3/p395-faloutsos/p395-faloutsos.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-3/p395-faloutsos/; http://www.acm.org/pubs/toc/Abstracts/tods/214285.html", abstract = "Signature files seem to be a promising access method for text and attributes. According to this method, the documents (or records) are stored sequentially in one file (``text file''), while abstractions of the documents (``signatures'') are stored sequentially in another file (``signature file''). In order to resolve a query, the signature file is scanned first, and many nonqualifying documents are immediately rejected. We develop a framework that includes primary key hashing, multiattribute hashing, and signature files. Our effort is to find the optimal signature extraction method. \par The main contribution of this paper is that we present optimal and efficient suboptimal algorithms for assigning words to signatures in several environments. Another contribution is that we use information theory, and study the relationship of the false drop probability $ F_d $ and the information that is lost during signature extraction. We give tight lower bounds on the achievable $ F_d $ and show that a simple relationship holds between the two quantities in the case of optimal signature extraction with uniform occurrence and query frequencies. We examine hashing as a method to map words to signatures (instead of the optimal way), and show that the same relationship holds between $ F_d $ and {\em loss}, indicating that an invariant may exist between these two quantities for every signature extraction method.", acknowledgement = ack-nhfb, annote = "superimposed coding", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data processing; database systems; information loss; optimal signature extraction; performance; signature files, design", subject = "{\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf E.4}: Data, CODING AND INFORMATION THEORY, Data compaction and compression. {\bf E.5}: Data, FILES. {\bf H.3.2}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Storage, File organization.", } @Article{Ibaraki:1987:SC, author = "Toshihide Ibaraki and Tiko Kameda and Toshimi Minoura", title = "Serializability with Constraints", journal = j-TODS, volume = "12", number = "3", pages = "429--452", month = sep, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "909 139", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-3/p429-ibaraki/p429-ibaraki.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-3/p429-ibaraki/; http://www.acm.org/pubs/toc/Abstracts/tods/214284.html", abstract = "This paper deals with the serializability theory for single-version and multiversion database systems. We first introduce the concept of {\em disjoint-interval topological sort\/} ({\em DITS}, for short) of an arc-labeled directed acyclic graph. It is shown that a history is serializable if and only if its {\em transaction IO graph\/} has a DITS. We then define several subclasses of serializable histories, based on the constraints imposed by write-write, write-read, read-write, or read-read conflicts, and investigate inclusion relationships among them. In terms of DITS, we give a sufficient condition for a class of serializable histories to be polynomially recognizable, which is then used to show that a new class of histories, named WRW, can be recognized in polynomial time. We also present NP-completeness results for the problem of testing membership in some other classes. \par In the second half of this paper, we extend these results to multiversion database systems. The inclusion relationships among multiversion classes defined by constraints, such as write-write and write-read, are investigated. One such class coincides with class DMVSR, introduced by Papadimitriou and Kanellakis, and gives a simple characterization of this class. It is shown that for most constraints, multiversion classes properly contain the corresponding single-version classes. Complexity results for the membership testing are also discussed.", acknowledgement = ack-nhfb, annote = "classification and properties of conflict graphs, with and without versions.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer systems programming --- sorting; concurrency control; database systems; disjoint-interval topological sort, algorithms; serializability; theory", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing.", } @Article{Wolfson:1987:OLC, author = "Ouri Wolfson", title = "The Overhead of Locking (and Commit) Protocols in Distributed Databases", journal = j-TODS, volume = "12", number = "3", pages = "453--471", month = sep, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-3/p453-wolfson/p453-wolfson.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-3/p453-wolfson/; http://www.acm.org/pubs/toc/Abstracts/tods/28053.html", abstract = "The main purpose of a locking protocol is to ensure correct interleaving of actions executed by concurrent transactions. The locking protocol consists of a set of rules dictating how accessed entities should be locked and unlocked. As a result of obeying the rules, transactions in a distributed database incur an overhead. We propose three measures of evaluating this overhead, each most suitable to a different type of underlying communication network. Then, using a graph theoretic model, we analyze and compare three protocols according to each measure: two-phase locking, two-phase locking with a fixed order imposed on the database entities (ensuring deadlock freedom), and the tree protocol. In practice, a transaction also executes the two-phase commit protocol in order to guarantee atomicity. Therefore, the combined overhead of each locking protocol and the two-phase commit protocol is also determined.", acknowledgement = ack-nhfb, classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Measurement; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "commit protocols; computer networks --- protocols; concurrency control, algorithms; database systems; locking protocols; measurement; message passing; performance; theory", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf C.4}: Computer Systems Organization, PERFORMANCE OF SYSTEMS. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf D.2.8}: Software, SOFTWARE ENGINEERING, Metrics, Performance measures. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing.", } @Article{Su:1987:CBD, author = "Stanley Y. W. Su and Jozo Dujmovic and D. S. Batory and S. B. Navathe and Richard Elnicki", title = "A Cost-Benefit Decision Model: Analysis, Comparison, and Selection of Data Management Systems", journal = j-TODS, volume = "12", number = "3", pages = "472--520", month = sep, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-3/p472-su/p472-su.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-3/p472-su/; http://www.acm.org/pubs/toc/Abstracts/tods/33403.html", abstract = "This paper describes a general cost-benefit decision model that is applicable to the evaluation, comparison, and selection of alternative products with a multiplicity of features, such as complex computer systems. The application of this model is explained and illustrated using the selection of data management systems as an example.\par The model has the following features: (1) it is mathematically based on an extended continuous logic and a theory of complex criteria; (2) the decision-making procedure is very general yet systematic, well-structured, and quantitative; (3) the technique is based on a comprehensive cost analysis and an elaborate analysis of benefits expressed in terms of the decision maker's preferences. The decision methodology, when applied to the problem of selecting a data management system, takes into consideration the life cycle of a DMS and the objectives and goals for the new systems under evaluation. It allows the cost and preference analyses to be carried out separately using two different models. The model for preference analysis makes use of comprehensive performance (or preference) parameters and allows what we call a ``logic scoring of preferences'' using continuous values between zero and one, to express the degree with which candidate systems satisfy stated requirements. It aggregates preference parameters based on their relative weights and logical relationships to compute a global performance (preference) score for each system. The cost model incorporates an aggregation of costs which may be estimated over different time horizons and discounted at appropriate discount rates. A procedure to establish an overall ranking of alternative systems based on their global preference scores and global costs is also discussed.", acknowledgement = ack-nhfb, annote = "The LPS model results from a NBS study. Preference scoring, facility costs.", classification = "723; 921", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "cost-benefit decision model; data management systems, SYWSU TODS; database systems; mathematical models", subject = "{\bf H.2.7}: Information Systems, DATABASE MANAGEMENT, Database Administration. {\bf K.6.3}: Computing Milieux, MANAGEMENT OF COMPUTING AND INFORMATION SYSTEMS, Software Management. {\bf D.2.9}: Software, SOFTWARE ENGINEERING, Management, Cost estimation.", } @Article{Keller:1987:CBS, author = "Arthur M. Keller", title = "Comment on {Bancilhon} and {Spyratos}' {``Update semantics and relational views''}", journal = j-TODS, volume = "12", number = "3", pages = "521--523", month = sep, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "See \cite{Bancilhon:1981:USR}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-3/p521-keller/p521-keller.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-3/p521-keller/; http://www.acm.org/pubs/toc/Abstracts/tods/214296.html", acknowledgement = ack-nhfb, annote = "a small relaxation of constraints increases the feasibility of view update", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; theory", subject = "{\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Retrieval models. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models.", } @Article{Abiteboul:1987:IFS, author = "Serge Abiteboul and Richard Hull", title = "{IFO}: a Formal Semantic Database Model", journal = j-TODS, volume = "12", number = "4", pages = "525--565", month = dec, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "88i:68017", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; Distributed/gesturing.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib", note = "Also published in/as: ACM SIGACT-SIGMOD Symp. on Principles of Database Systems, 1984 (short version). Also published in/as: USC CSD, TR-84-304, Apr. 1984.", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-4/p525-abiteboul/p525-abiteboul.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-4/p525-abiteboul/; http://www.acm.org/pubs/toc/Abstracts/tods/32205.html", abstract = "A new, formally defined database model is introduced that combines fundamental principles of ``semantic'' database modeling in a coherent fashion. Using a graph-based formalism, the IFO model provides mechanisms for representing structured objects, and functional and ISA relationships between them. A number of fundamental results concerning semantic data modeling are obtained in the context of the IFO model. Notably, the types of object structure that can arise as a result of multiple uses of ISA relationships and object construction are described. Also, a natural, formal definition of update propagation is given, and it is shown that (under certain conditions) a correct update always exists.", acknowledgement = ack-nhfb, annote = "A graph-based formal semantic database model. Well written. The model appears to be well founded.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; design; theory; verification", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf I.2.4}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Knowledge Representation Formalisms and Methods, Semantic networks.", } @Article{Ozsoyoglu:1987:ERA, author = "G. {\"O}zsoyo{\u{g}}lu and Z. M. {\"O}zsoyo{\u{g}}lu and V. Matos", title = "Extending Relational Algebra and Relational Calculus with Set-Valued Attributes and Aggregate Functions", journal = j-TODS, volume = "12", number = "4", pages = "566--592", month = dec, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "920 253", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-4/p566-ozsoyoglu/p566-ozsoyoglu.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-4/p566-ozsoyoglu/; http://www.acm.org/pubs/toc/Abstracts/tods/32219.html", abstract = "In commercial network database management systems, set-valued fields and aggregate functions are commonly supported. However, the relational database model, as defined by Codd, does not include set-valued attributes or aggregate functions. Recently, Klug extended the relational model by incorporating aggregate functions and by defining relational algebra and calculus languages.\par In this paper, relational algebra and relational calculus database query languages (as defined by Klug) are extended to manipulate set-valued attributes and to utilize aggregate functions. The expressive power of the extended languages is shown to be equivalent. We extend the relational algebra with three new operators, namely, pack, unpack, and aggregation-by-template. The extended languages form a theoretical framework for statistical database query languages.", acknowledgement = ack-nhfb, annote = "Nested relations.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Languages; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; languages; theory; verification; {\"O}zsoyo{\u{g}}lu Nested NF2 TODS", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages.", } @Article{Palley:1987:URM, author = "Michael A. Palley and Jeffrey S. Simonoff", title = "The Use of Regression Methodology for the Compromise of Confidential Information in Statistical Databases", journal = j-TODS, volume = "12", number = "4", pages = "593--608", month = dec, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-4/p593-palley/p593-palley.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-4/p593-palley/; http://www.acm.org/pubs/toc/Abstracts/tods/42174.html", abstract = "A regression methodology based technique can be used to compromise confidentiality in a statistical database. This holds true even when the DBMS prevents application of regression methodology to the database. Existing inference controls, including cell restriction, perturbation, and table restriction approaches, are shown to be generally ineffective against this compromise technique. The effect of incomplete supplemental knowledge on the regression methodology based compromise technique is examined. Finally, some potential complicators of this disclosure scheme are introduced.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Legal Aspects; Management; Security", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "legalaspects; management; security, security TODS", subject = "{\bf H.2.7}: Information Systems, DATABASE MANAGEMENT, Database Administration. {\bf H.2.8}: Information Systems, DATABASE MANAGEMENT, Database applications. {\bf K.4.1}: Computing Milieux, COMPUTERS AND SOCIETY, Public Policy Issues, Privacy. {\bf K.6.m}: Computing Milieux, MANAGEMENT OF COMPUTING AND INFORMATION SYSTEMS, Miscellaneous, Security*. {\bf H.2.0}: Information Systems, DATABASE MANAGEMENT, General, Security, integrity, and protection. {\bf G.3}: Mathematics of Computing, PROBABILITY AND STATISTICS, Statistical computing.", } @Article{Agrawal:1987:CCP, author = "Rakesh Agrawal and Michael J. Carey and Miron Livny", title = "Concurrency Control Performance Modeling: Alternatives and Implications", journal = j-TODS, volume = "12", number = "4", pages = "609--654", month = dec, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/real.time.bib", note = "Also published in/as: ACM SIGMOD Conf. on the Management of Data, 1985.", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-4/p609-agrawal/p609-agrawal.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-4/p609-agrawal/; http://www.acm.org/pubs/toc/Abstracts/tods/32220.html", abstract = "A number of recent studies have examined the performance of concurrency control algorithms for database management systems. The results reported to date, rather than being definitive, have tended to be contradictory. In this paper, rather than presenting ``yet another algorithm performance study,'' we critically investigate the assumptions made in the models used in past studies and their implications. We employ a fairly complete model of a database environment for studying the relative performance of three different approaches to the concurrency control problem under a variety of modeling assumptions. The three approaches studied represent different extremes in how transaction conflicts are dealt with, and the assumptions addressed pertain to the nature of the database system's resources, how transaction restarts are modeled, and the amount of information available to the concurrency control algorithm about transactions' reference strings. We show that differences in the underlying assumptions explain the seemingly contradictory performance results. We also address the question of how realistic the various assumptions are for actual database systems.", acknowledgement = ack-nhfb, annote = "This paper is unique in that it studies the implications of fundamental assumptions regarding system resources, transactions restarts, and predeclarations of writes. Essential reading.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "performance", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf D.4.8}: Software, OPERATING SYSTEMS, Performance, Simulation. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf D.4.2}: Software, OPERATING SYSTEMS, Storage Management.", } @Article{Sacks-Davis:1987:MAM, author = "R. Sacks-Davis and A. Kent and K. Ramamohanarao", title = "Multikey Access Methods Based on Superimposed Coding Techniques", journal = j-TODS, volume = "12", number = "4", pages = "655--696", month = dec, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1987-12-4/p655-sacks-davis/p655-sacks-davis.pdf; http://www.acm.org/pubs/citations/journals/tods/1987-12-4/p655-sacks-davis/; http://www.acm.org/pubs/toc/Abstracts/tods/32222.html", abstract = "Both single-level and two-level indexed descriptor schemes for multikey retrieval are presented and compared. The descriptors are formed using superimposed coding techniques and stored using a bit-inversion technique. A fast-batch insertion algorithm for which the cost of forming the bit-inverted file is less than one disk access per record is presented. For large data files, it is shown that the two-level implementation is generally more efficient for queries with a small number of matching records. For queries that specify two or more values, there is a potential problem with the two-level implementation in that costs may accrue when blocks of records match the query but individual records within these blocks do not. One approach to overcoming this problem is to set bits in the descriptors based on pairs of indexed terms. This approach is presented and analyzed.", acknowledgement = ack-nhfb, annote = "Expressions for the cost of a two-level and one-level scheme.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "descriptors; Design; hashing; partial match retrieval; performance; record signatures; superimposed coding, design", subject = "{\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf H.3.2}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Storage, File organization. {\bf H.4.1}: Information Systems, INFORMATION SYSTEMS APPLICATIONS, Office Automation. {\bf I.7.m}: Computing Methodologies, TEXT PROCESSING, Miscellaneous.", } @Article{Elhardt:1987:SQO, author = "K. Elhardt", title = "Support for Query Optimization by Optimal Predicate Splitting", journal = j-TODS, volume = "??", number = "??", pages = "??--??", month = mar, year = "1987", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibsource = "Database/Wiederhold.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Accepted.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", xxnote = "This paper does not seem to be published in TODS.", } @Article{Vianu:1988:DFO, author = "Victor Vianu", title = "A Dynamic Framework for Object Projection Views", journal = j-TODS, volume = "13", number = "1", pages = "1--22", month = mar, year = "1988", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "933 215", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1988-13-1/p1-vianu/p1-vianu.pdf; http://www.acm.org/pubs/citations/journals/tods/1988-13-1/p1-vianu/; http://www.acm.org/pubs/toc/Abstracts/tods/42202.html", abstract = "User views in a relational database obtained through a single projection ('projection views') are considered in a new framework. Specifically, such views, where each tuple in the view represents an object ('object-projection views'), are studied using the dynamic relational model, which captures the evolution of the database through consecutive updates. Attribute sets that yield object-projection views are characterized using the static and dynamic functional dependencies satisfied by the database. Object-projection views are then described using the static and dynamic functional dependencies `inherited' from the original database. Finally, the impact of dynamic constraints on the view update problem is studied in a limited context. This paper demonstrates that new, useful information about views can be obtained by looking at the evolution of the database as captured by the dynamic relational model.", acknowledgement = ack-nhfb, affiliation = "Univ of California, San Diego, CA, USA", affiliationaddress = "Univ of California, San Diego, CA, USA", annote = "Assumes simplest translation to the underlying database. Very restrictive unirelational database FDs and DFDs, no exclusions or deletions. Technically very good. Rejecting update object projections.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages; Management; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database schema, design; database systems; dynamic constraints; functional dependency; languages; management; object projection views; relational; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data description languages (DDL).", } @Article{Sellis:1988:MQO, author = "Timos K. Sellis", title = "Multiple-Query Optimization", journal = j-TODS, volume = "13", number = "1", pages = "23--52", month = mar, year = "1988", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/database.bib; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1988-13-1/p23-sellis/p23-sellis.pdf; http://www.acm.org/pubs/citations/journals/tods/1988-13-1/p23-sellis/; http://www.acm.org/pubs/toc/Abstracts/tods/42203.html", abstract = "Some recently proposed extensions to relational database systems, as well as to deductive database systems, require support for multiple-query processing. For example, in a database system enhanced with inference capabilities, a simple query involving a rule with multiple definitions may expand to more than one actual query that has to be run over the database. It is an interesting problem then to come up with algorithms that process these queries together instead of one query at a time. The main motivation for performing such an interquery optimization lies in the fact that queries may share common data. We examine the problem of multiple-query optimization in this paper. The first major contribution of the paper is a systematic look at the problem, along with the presentation and analysis of algorithms that can be used for multiple-query optimization. The second contribution lies in the presentation of experimental results. Our results show that using multiple-query processing algorithms may reduce execution cost considerably.", acknowledgement = ack-nhfb, affiliation = "Univ of California, Berkeley, CA, USA", affiliationaddress = "Univ of California, Berkeley, CA, USA", annote = "Two architectures: Interleaving the results of locally optimal access plans or a global optimizer. Experimental results, a decrease of 20-50 percent show in I/O and CPU time. ---Sava-Segal.", classification = "723; 921", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "common access paths; computer programming --- algorithms; database systems; deductive databases; heuristic methods, global query TODS, algorithms; multiple-query optimization; optimization; performance; Relational", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf I.2.8}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Problem Solving, Control Methods, and Search, Heuristic methods. {\bf H.3.4}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Systems and Software.", } @Article{Shasha:1988:CSS, author = "Dennis Shasha and Nathan Goodman", title = "Concurrent Search Structure Algorithms", journal = j-TODS, volume = "13", number = "1", pages = "53--90", month = mar, year = "1988", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1988-13-1/p53-shasha/p53-shasha.pdf; http://www.acm.org/pubs/citations/journals/tods/1988-13-1/p53-shasha/; http://www.acm.org/pubs/toc/Abstracts/tods/42204.html", abstract = "A dictionary is an abstract data type supporting the actions member, insert, and delete. A search structure is a data structure used to implement a dictionary. Examples include B trees, hash structures, and unordered lists. Concurrent algorithms on search structures can achieve more parallelism than standard concurrency control methods would suggest, by exploiting the fact that many different search structure states represent one dictionary state. We present a framework for verifying such algorithms and for inventing new ones. We give several examples, one of which exploits the structure of Banyan family interconnection networks. We also discuss the interaction between concurrency control and recovery as applied to search structures.", acknowledgement = ack-nhfb, affiliation = "New York Univ, New York, NY, USA", affiliationaddress = "New York Univ, New York, NY, USA", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "abstract data type; Banyan interconnection networks; computer programming --- algorithms; concurrent algorithms, algorithms; data processing; Data Structures; design; dictionary; performance; search structure", subject = "{\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf D.4.1}: Software, OPERATING SYSTEMS, Process Management, Concurrency. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing.", } @Article{Finkelstein:1988:PDD, author = "S. J. Finkelstein and M. Schkolnick and P. Tiberio", title = "Physical Database Design for Relational Databases", journal = j-TODS, volume = "13", number = "1", pages = "91--128", month = mar, year = "1988", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib", note = "Also published in/as: IBM Research Report No. RJ5034, Nov. 1986, preprint IBM Aug. 1985.", URL = "http://www.acm.org/pubs/articles/journals/tods/1988-13-1/p91-finkelstein/p91-finkelstein.pdf; http://www.acm.org/pubs/citations/journals/tods/1988-13-1/p91-finkelstein/; http://www.acm.org/pubs/toc/Abstracts/tods/42205.html", abstract = "This paper describes the concepts used in the implementation of DBDSGN, an experimental physical design tool for relational databases developed at the IBM San Jose Research Laboratory. Given a workload for System R (consisting of a set of SQL statements and their execution frequencies), DBDSGN suggests physical configurations for efficient performance. Each configuration consists of a set of indices and an ordering for each table. Workload statements are evaluated only for atomic configurations of indices, which have only one index per table. Costs for any configuration can be obtained from those of the atomic configurations. DBDSGN uses information supplied by the System R optimizer both to determine which columns might be worth indexing and to obtain estimates of the cost of executing statements in different configurations. The tool finds efficient solutions to the index-selection problem; if we assume the cost estimates supplied by the optimizer are the actual execution costs, it finds the optimal solution. Optionally, heuristics can be used to reduce execution time. The approach taken by DBDSGN in solving the index-selection problem for multiple-table statements significantly reduces the complexity of the problem. DBDSGN's principles were used in the Relational Design Tool (RDT), an IBM product based on DBDSGN, which performs design for SQL/DS, a relational system based on System R. System R actually uses DBDSGN's suggested solutions as the tool expects because cost estimates and other necessary information can be obtained from System R using a new SQL statement, the EXPLAIN statement. This illustrates how a system can export a model of its internal assumptions and behavior so that other systems (such as tools) can share this model.", acknowledgement = ack-nhfb, affiliation = "IBM", affiliationaddress = "IBM", annote = "DBDSGN led to IBM RDT; input are relational tables and a set of queries expected to be run; produces specifications of indexes, clustered; it uses the actual DBMS (SQL/DS) optimizer.", classification = "723; 921", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Experimentation; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "DBDSGN; design; experimentation; IBM DBDSGN tool TODS, database systems; optimization; performance; physical database design; relational; System R, algorithms; theory", subject = "{\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf D.4.8}: Software, OPERATING SYSTEMS, Performance, Modeling and prediction. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf E.1}: Data, DATA STRUCTURES, Tables.", } @Article{Raju:1988:FFD, author = "K. V. S. V. N. Raju and Arun K. Majumdar", title = "Fuzzy Functional Dependencies and Lossless Join Decomposition of Fuzzy Relational Database Systems", journal = j-TODS, volume = "13", number = "2", pages = "129--166", month = jun, year = "1988", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1988-13-2/p129-raju/p129-raju.pdf; http://www.acm.org/pubs/citations/journals/tods/1988-13-2/p129-raju/; http://www.acm.org/pubs/toc/Abstracts/tods/42344.html", abstract = "This paper deals with the application of fuzzy logic in a relational database environment with the objective of capturing more meaning of the data. It is shown that with suitable interpretations for the fuzzy membership functions, a fuzzy relational data model can be used to represent ambiguities in data values as well as impreciseness in the association among them. Relational operators for fuzzy relations have been studied, and applicability of fuzzy logic in capturing integrity constraints has been investigated. By introducing a fuzzy resemblance measure EQUAL for comparing domain values, the definition of classical functional dependency has been generalized to fuzzy functional dependency (ffd). The implication problem of ffds has been examined and a set of sound and complete inference axioms has been proposed. Next, the problem of lossless join decomposition of fuzzy relations for a given set of fuzzy functional dependencies is investigated. It is proved that with a suitable restriction on EQUAL, the design theory of a classical relational database with functional dependencies can be extended to fuzzy relations satisfying fuzzy functional dependencies.", acknowledgement = ack-nhfb, affiliation = "Ahdhra Univ, Visakhapatnam, India", affiliationaddress = "Ahdhra Univ, Visakhapatnam, India", annote = "Extend Armstrong's axioms to fuzzy domains. The concept works out theoretically, but may not necessarily agree with the intuition of the database user. This paper is highly theoretical and notationally intimidating. Yet worth to peruse.", classification = "723; 921", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database systems; fuzzy functional dependencies; fuzzy relational database systems, design; lossless join decomposition; mathematical techniques --- fuzzy sets; relational; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design. {\bf E.5}: Data, FILES. {\bf I.2.3}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Deduction and Theorem Proving, Uncertainty, ``fuzzy,'' and probabilistic reasoning.", } @Article{Winslett:1988:MBA, author = "Marianne Winslett", title = "A Model-Based Approach to Updating Databases with Incomplete Information", journal = j-TODS, volume = "13", number = "2", pages = "167--196", month = jun, year = "1988", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/bibdb.bib; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1988-13-2/p167-winslett/p167-winslett.pdf; http://www.acm.org/pubs/citations/journals/tods/1988-13-2/p167-winslett/; http://www.acm.org/pubs/toc/Abstracts/tods/42386.html", abstract = "Suppose one wishes to construct, use, and maintain a database of facts about the real world, even though the state of that world is only partially known. In the artificial intelligence domain, this problem arises when an agent has a base set of beliefs that reflect partial knowledge about the world, and then tries to incorporate new, possibly contradictory knowledge into this set of beliefs. In the database domain, one facet of this situation is the well-known null values problem. We choose to represent such a database as a logical theory, and view the models of the theory as representing possible states of the world that are consistent with all known information.\par How can new information be incorporated into the database? For example, given the new information that ``$b$ or $c$ is true,'' how can one get rid of all outdated information about $b$ and $c$, add the new information, and yet in the process not disturb any other information in the database? In current-day database management systems, the difficult and tedious burden of determining exactly what to add and remove from the database is placed on the user. The goal of our research was to relieve users of that burden, by equipping the database management system with update algorithms that can automatically determine what to add and remove from the database. \par Under our approach, new information about the state of the world is input to the database management system as a well-formed formula that the state of the world is now known to satisfy. We have constructed database update algorithms to interpret this update formula and incorporate the new information represented by the formula into the database without further assistance from the user. In this paper we show how to embed the incomplete database and the incoming information in the language of mathematical logic, explain the semantics of our update operators, and discuss the algorithms that implement these operators.", acknowledgement = ack-nhfb, affiliation = "Univ of Illinois, Urbana, IL, USA", affiliationaddress = "Univ of Illinois, Urbana, IL, USA", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Human Factors; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming --- algorithms; database systems; database updates; humanfactors; incomplete information; languages; theory; uncertainty, algorithms; update algorithms", subject = "{\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems. {\bf H.3.0}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, General. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Retrieval models. {\bf I.2.3}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Deduction and Theorem Proving.", } @Article{Eich:1988:DCC, author = "Margaret H. Eich and David L. Wells", title = "Database Concurrency Control using Data Flow Graphs", journal = j-TODS, volume = "13", number = "2", pages = "197--227", month = jun, year = "1988", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "943 408", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1988-13-2/p197-eich/p197-eich.pdf; http://www.acm.org/pubs/citations/journals/tods/1988-13-2/p197-eich/; http://www.acm.org/pubs/toc/Abstracts/tods/42345.html", abstract = "A specialized data flow graph, {\em Database Flow Graph\/} ({\em DBFG\/}) is introduced. DBFGs may be used for scheduling database operations, particularly in an MIMD database machine environment. A DBFG explicitly maintains intertransaction and intratransaction dependencies, and is constructed from the Transaction Flow Graphs (TFG) of active transactions. A TFG, in turn, is the generalization of a query tree used, for example, in DIRECT [15]. \par All DBFG schedules are serializable and deadlock free. Operations needed to create and maintain the DBFG structure as transactions are added or removed from the system are discussed. Simulation results show that DBFG scheduling performs as well as two-phase locking.", acknowledgement = ack-nhfb, affiliation = "Southern Methodist Univ, Dallas, TX, USA", affiliationaddress = "Southern Methodist Univ, Dallas, TX, USA", classification = "723; 921", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data flow graphs; database concurrency control; database systems; deadlock, algorithms; distributed; mathematical techniques --- graph theory; performance; serializability", subject = "{\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Deadlock avoidance. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing.", } @Article{Batory:1988:ICE, author = "D. S. Batory and T. Y. Leung and T. E. Wise", title = "Implementation Concepts for an Extensible Data Model and Data Language", journal = j-TODS, volume = "13", number = "3", pages = "231--262", month = sep, year = "1988", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: Un. Texas, TR-86-24, Oct. 1986.", URL = "http://www.acm.org/pubs/articles/journals/tods/1988-13-3/p231-batory/p231-batory.pdf; http://www.acm.org/pubs/citations/journals/tods/1988-13-3/p231-batory/; http://www.acm.org/pubs/toc/Abstracts/tods/45062.html", abstract = "Future database systems must feature extensible data models and data languages in order to accommodate the novel data types and special-purpose operations that are required by nontraditional database applications. In this paper, we outline a functional data model and data language that are targeted for the semantic interface of GENESIS, an extensible DBMS. The model and language are generalizations of FQL [11] and DAPLEX [40], and have an implementation that fits ideally with the modularity required by extensible database technologies. We explore different implementations of functional operators and present experimental evidence that they have efficient implementations. We also explain the advantages of a functional front-end to 1NF databases, and show how our language and implementation are being used to process queries on both 1NF and 1NF relations.", acknowledgement = ack-nhfb, affiliation = "Univ of Texas at Austin, Austin, TX, USA", affiliationaddress = "Univ of Texas at Austin, Austin, TX, USA", annote = "Illustrates a functional data model of GENESIS. Defines GDM, the data model, and GDL, the data language. GDM productions are stream rewrite rules and GDM computations are expressed as streams of tokens. Promotes extensibility. To build a non-1NF query processor is an example.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming languages; data language; database systems; functional data model; languages; lazy evaluation, TODS genesis functional language join joins lazy eager nested relations, design; Management; performance; stream translators", subject = "{\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data manipulation languages (DML). {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf I.1.3}: Computing Methodologies, ALGEBRAIC MANIPULATION, Languages and Systems, Evaluation strategies. {\bf I.1.3}: Computing Methodologies, ALGEBRAIC MANIPULATION, Languages and Systems, Nonprocedural languages.", } @Article{Apers:1988:DAD, author = "Peter M. G. Apers", title = "Data Allocation in Distributed Database Systems", journal = j-TODS, volume = "13", number = "3", pages = "263--304", month = sep, year = "1988", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1988-13-3/p263-apers/p263-apers.pdf; http://www.acm.org/pubs/citations/journals/tods/1988-13-3/p263-apers/; http://www.acm.org/pubs/toc/Abstracts/tods/45063.html", abstract = "The problem of allocating the data of a database to the sites of a communication network is investigated. This problem deviates from the well-known file allocation problem in several aspects. First, the objects to be allocated are not known a priori; second, these objects are accessed by schedules that contain transmissions between objects to produce the result. A model that makes it possible to compare the cost of allocations is presented; the cost can be computed for different cost functions and for processing schedules produced by arbitrary query processing algorithms. \par For minimizing the total transmission cost, a method is proposed to determine the fragments to be allocated from the relations in the conceptual schema and the queries and updates executed by the users.\par For the same cost function, the complexity of the data allocation problem is investigated. Methods for obtaining optimal and heuristic solutions under various ways of computing the cost of an allocation are presented and compared.\par Two different approaches to the allocation management problem are presented and their merits are discussed.", acknowledgement = ack-nhfb, affiliation = "Vrije Univ, Enschede, Neth", affiliationaddress = "Vrije Univ, Enschede, Neth", annote = "Fragments are allocated. The strength of the paper is its rigor, the weakness is in the applicability of the model.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Measurement; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer networks; computer software --- software engineering; data allocation; database systems; design; distributed; dynamic schedules; greedy method; heuristic allocations, algorithms; measurement; optimal allocations; static schedules; theory", subject = "{\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf D.2.8}: Software, SOFTWARE ENGINEERING, Metrics, Performance measures.", } @Article{Storey:1988:MCU, author = "Veda C. Storey and Robert C. Goldstein", title = "A Methodology for Creating User Views in Database Design", journal = j-TODS, volume = "13", number = "3", pages = "305--338", month = sep, year = "1988", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; Distributed/gesturing.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1988-13-3/p305-storey/p305-storey.pdf; http://www.acm.org/pubs/citations/journals/tods/1988-13-3/p305-storey/; http://www.acm.org/pubs/toc/Abstracts/tods/45064.html", abstract = "The View Creation System (VCS) is an expert system that engages a user in a dialogue about the information requirements for some application, develops an Entity-Relationship model for the user's database view, and then converts the E-R model to a set of Fourth Normal Form relations. This paper describes the knowledge base of VCS. That is, it presents a formal methodology, capable of mechanization as a computer program, for accepting requirements from a user, identifying and resolving inconsistencies, redundancies, and ambiguities, and ultimately producing a normalized relational representation. Key aspects of the methodology are illustrated by applying VCS's knowledge base to an actual database design task.", acknowledgement = ack-nhfb, affiliation = "Univ of Rochester, Rochester, NY, USA", affiliationaddress = "Univ of Rochester, Rochester, NY, USA", annote = "Engages the user in a dialogue about the information requirements for some application, develops an E-R model and Fourth Normal Form relations.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming; database systems; design; expert system; knowledge base; user views, design; view creation system (VCS)", subject = "{\bf I.2.1}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Applications and Expert Systems. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design. {\bf H.2.7}: Information Systems, DATABASE MANAGEMENT, Database Administration.", } @Article{Diederich:1988:NMF, author = "Jim Diederich and Jack Milton", title = "New Methods and Fast Algorithms for Database Normalization", journal = j-TODS, volume = "13", number = "3", pages = "339--365", month = sep, year = "1988", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "1 072 196", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib", note = "Also published in/as: UCD, Math, revised Jun. 1987.", URL = "http://www.acm.org/pubs/articles/journals/tods/1988-13-3/p339-diederich/p339-diederich.pdf; http://www.acm.org/pubs/citations/journals/tods/1988-13-3/p339-diederich/; http://www.acm.org/pubs/toc/Abstracts/tods/44499.html", abstract = "A new method for computing minimal covers is presented using a new type of closure that allows significant reductions in the number of closures computed for normalizing relations. Benchmarks are reported comparing the new and the standard techniques.", acknowledgement = ack-nhfb, affiliation = "Univ of California, Davis, CA, USA", affiliationaddress = "Univ of California, Davis, CA, USA", annote = "Modify existing algorithms to make the process faster.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming --- algorithms; database normalization; database systems; design; fast algorithms; functional dependency; management; redundant dependencies; relational database, algorithms", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Normal forms.", } @Article{Larson:1988:LHS, author = "Per-{\AA}ke Larson", title = "Linear Hashing with Separators --- {A} Dynamic Hashing Scheme Achieving One-Access Retrieval", journal = j-TODS, volume = "13", number = "3", pages = "366--388", month = sep, year = "1988", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1988-13-3/p366-larson/p366-larson.pdf; http://www.acm.org/pubs/citations/journals/tods/1988-13-3/p366-larson/; http://www.acm.org/pubs/toc/Abstracts/tods/44500.html", abstract = "A new dynamic hashing scheme is presented. Its most outstanding feature is that any record can be retrieved in exactly one disk access. This is achieved by using a small amount of supplemental internal storage that stores enough information to uniquely determine the current location of any record. The amount of internal storage required is small: typically one byte for each page of the file. The necessary address computation, insertion, and expansion algorithms are presented and the performance is studied by means of simulation. The new method is the first practical method offering one-access retrieval for large dynamic files.", acknowledgement = ack-nhfb, affiliation = "Univ of Waterloo, Waterloo, Ont, Can", affiliationaddress = "Univ of Waterloo, Waterloo, Ont, Can", annote = "A small amount of supplemental internal storage stores enough information to uniquely determine the current location of any record.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "address computation; data processing --- File organization; design; dynamic hashing schemes; extendible hashing; linear hashing; linear probing; Management; one-access retrieval; open addressing, algorithms; open addressing, database systems; performance", review = "ACM CR 8811-0850", subject = "{\bf D.4.3}: Software, OPERATING SYSTEMS, File Systems Management, Access methods. {\bf D.4.3}: Software, OPERATING SYSTEMS, File Systems Management, File organization. {\bf E.5}: Data, FILES, Organization/structure. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods.", } @Article{Roth:1988:EAC, author = "Mark A. Roth and Henry F. Korth and Abraham Silberschatz", title = "Extended Algebra and Calculus for Nested Relational Databases", journal = j-TODS, volume = "13", number = "4", pages = "389--417", month = dec, year = "1988", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "1 072 197", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/database.bib; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib", note = "See comment \cite{Tansel:1992:MRH}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1988-13-4/p389-roth/p389-roth.pdf; http://www.acm.org/pubs/citations/journals/tods/1988-13-4/p389-roth/; http://www.acm.org/pubs/toc/Abstracts/tods/49347.html", abstract = "Relaxing the assumption that relations are always in First-Normal-Form (1NF) necessitates a reexamination of the fundamentals of relational database theory. In this paper we take a first step towards unifying the various theories of $ \not $1NF databases. We start by determining an appropriate model to couch our formalisms in. We then define an extended relational calculus as the theoretical basis for our $ \not $1NF relational calculus. We define a class of $ \not $1NF relations with certain ``good'' properties and extend our algebra operators to work within this domain. We prove certain desirable equivalences that hold only if we restrict our language to this domain.", acknowledgement = ack-nhfb, affiliation = "Texas Univ., Austin, TX, USA", affiliationaddress = "Austin, TX, USA", annote = "Only two new operators, nest and unnest.", classification = "723; 921", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "languages; mathematical techniques --- algebra; nested relational databases; non-first normal form database; partitioned normal forms; query languages; relational; relational calculus; theory, extended algebra and calculus, nested relations, non-first normal form, partitioned normal form, database systems", owner = "curtis", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Normal forms. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages.", } @Article{Gadia:1988:HRM, author = "Shashi K. Gadia", title = "A Homogeneous Relational Model and Query Languages for Temporal Databases", journal = j-TODS, volume = "13", number = "4", pages = "418--448", month = dec, year = "1988", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "1 072 198", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1988-13-4/p418-gadia/p418-gadia.pdf; http://www.acm.org/pubs/citations/journals/tods/1988-13-4/p418-gadia/; http://www.acm.org/pubs/toc/Abstracts/tods/50065.html", abstract = "In a temporal database, time values are associated with data item to indicate their periods of validity. We propose a model for temporal databases within the framework of the classical database theory. Our model is realized as a temporal parameterization of static relations. We do not impose any restrictions upon the schemes of temporal relations. The classical concepts of normal forms and dependencies are easily extended to our model, allowing a suitable design for a database scheme. We present a relational algebra and a tuple calculus for our model and prove their equivalence. Our data model is homogeneous in the sense that the periods of validity of all the attributes in a given tuple of a temporal relation are identical. We discuss how to relax the homogeneity requirement to extend the application domain of our approach.", acknowledgement = ack-nhfb, affiliation = "Iowa State Univ., Ames, IA, USA", affiliationaddress = "Ames, IA, USA", annote = "temporal tuples and temporal relations; the temporal domain varies from tuple to tuple but it is constant with respect to attributes.", classification = "723; 921", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; languages; mathematical techniques --- algebra; query languages; relational; relational algebra; relational calculus; temporal databases; theory, database systems; tuple calculus", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages.", } @Article{Brosda:1988:URR, author = "Volkert Brosda and Gottfried Vossen", title = "Update and Retrieval in a Relational Database Through a Universal Schema Interface", journal = j-TODS, volume = "13", number = "4", pages = "449--485", month = dec, year = "1988", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "1 072 199", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1988-13-4/p449-brosda/p449-brosda.pdf; http://www.acm.org/pubs/citations/journals/tods/1988-13-4/p449-brosda/; http://www.acm.org/pubs/toc/Abstracts/tods/49884.html", abstract = "A database system that is based on the universal relation (UR) model aims at freeing its users from specifying access paths on both the physical and on the logical levels. All information about the logical structure of the database (i.e., its conceptual scheme) is hidden from users; they need only to know the attribute names, which now carry all the semantics of the database.\par Previous work on UR interfaces has concentrated on the design and implementation of query languages that serve to facilitate retrieval of data from a relational database. On the other hand, updates are always handled as before, which means that users still have to know the logical structure of the database in case they want to insert, delete, or modify tuples.\par In this paper the concepts underlying a UR interface, which is really ``universal,'' are presented; it is based on the UR model, and it permits not only queries but also updates: Combinations of attributes that may participate in an update-operation (``objects'') have to be specified during the design phase of the database, and are then embodied into the database scheme by an extended synthesis algorithm. They form the basis for any insertion or deletion operation. A precise definition of ``insertable'' tuples, and of the insert- and delete-operation in this new context, is given. It is then shown that these operations modify a database state in such a way that a representative instance always exists. This is accomplished by providing a more detailed version of Sagiv's uniqueness condition and by exploring the structure of nonunique objects. Since the underlying database always has a representative instance, this instance can be used to define the window function for retrieval. It is shown that it is still possible to compute windows by a union of minimal extension joins.", acknowledgement = ack-nhfb, affiliation = "Rheinisch-Westfalische Tech. Hochschule Aachen, West Germany", affiliationaddress = "Aachen, West Ger", classification = "723; 903", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer interfaces; database systems; database updates; information science --- information retrieval; languages; minimal extension joins; relational; theory; universal relation interface; universal scheme interfaces, design", subject = "{\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf D.2.2}: Software, SOFTWARE ENGINEERING, Tools and Techniques, User interfaces. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data manipulation languages (DML). {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing.", } @Article{Gottlob:1988:PUS, author = "Georg Gottlob and Paolo Paolini and Roberto Zicari", title = "Properties and Update Semantics of Consistent Views", journal = j-TODS, volume = "13", number = "4", pages = "486--524", month = dec, year = "1988", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "1 072 200", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1988-13-4/p486-gottlob/p486-gottlob.pdf; http://www.acm.org/pubs/citations/journals/tods/1988-13-4/p486-gottlob/; http://www.acm.org/pubs/toc/Abstracts/tods/50068.html", abstract = "The problem of translating view updates to database updates is considered. Both databases and views are modeled as data abstractions. A data abstraction consists of a set of states and of a set of primitive update operators representing state transition functions. It is shown how complex update programs can be built from primitive update operators and how view update programs are translated into database update programs. Special attention is paid to a class of views that we call ``consistent.'' Loosely speaking, a consistent view is a view with the following property: If the effect of a view update program on a view state is determined, then the effect of the corresponding database update is unambiguously determined. Thus, in order to know how to translate a given view update into a database update, it is sufficient to be aware of a functional specification of such a program. We show that consistent views have a number of interesting properties with respect to the concurrency of (high-level) update transactions. Moreover we show that the class of consistent views includes as a subset the class of views that translate updates under maintenance of a constant complement. However, we show that there exist consistent views that do not translate under constant complement. The results of Bancilhon and Spyratos [6] are generalized in order to capture the update semantics of the entire class of consistent views. In particular we show that the class of consistent views is obtained if we relax the requirement of a constant complement by allowing the complement to decrease according to a suitable partial order.", acknowledgement = ack-nhfb, affiliation = "Inst. for Appl. Math., CNR, Genoa, Italy", affiliationaddress = "Genoa, Italy", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency, design; consistent views; data abstractions; database systems; theory; update semantics; view updates", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems.", } @Article{Lomet:1988:SBD, author = "David B. Lomet", title = "A Simple Bounded Disorder File Organization with Good Performance", journal = j-TODS, volume = "13", number = "4", pages = "525--551", month = dec, year = "1988", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1988-13-4/p525-lomet/p525-lomet.pdf; http://www.acm.org/pubs/citations/journals/tods/1988-13-4/p525-lomet/; http://www.acm.org/pubs/toc/Abstracts/tods/50067.html", abstract = "A bounded-disorder (BD) file is one in which data are organized into nodes that are indexed, e.g., by means of a B-tree. The data nodes are multibucket nodes that are accessed by hashing. In this paper we present two important improvements to the BD organization as originally described. First, records in a data node that overflow their designated primary bucket are stored in a single overflow bucket which is itself a bucket of the data node. Second, when file space needs to be increased, partial expansions are used that employ elastic buckets. Analysis and simulation results demonstrate that this variant of the BD organization has utilization, random access performance, and file growth performance that can be competitive with good extendible hashing methods, while supporting high-performance sequential access. The simplicity of the organization results in simple algorithms for realizing the organization.", acknowledgement = ack-nhfb, affiliation = "Digital Equip. Corp., Nashua, NH, USA", annote = "Index entries refer to large data nodes which are treated as separate hashed files.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "bounded disorder file organization; computer programming --- algorithms; computer simulation; data processing; design; dynamic files; file organization; index sequential access; indexed files; performance; storage management, algorithms", review = "ACM CR 8904-0253", subject = "{\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf H.3.2}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Storage, File organization. {\bf D.4.3}: Software, OPERATING SYSTEMS, File Systems Management, Access methods. {\bf D.4.3}: Software, OPERATING SYSTEMS, File Systems Management, File organization.", } @Article{Freytag:1989:TRQ, author = "Johann Christoph Freytag and Nathan Goodman", title = "On the Translation of Relational Queries into Iterative Programs", journal = j-TODS, volume = "14", number = "1", pages = "1--27", month = mar, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1989-14-1/p1-freytag/p1-freytag.pdf; http://www.acm.org/pubs/citations/journals/tods/1989-14-1/p1-freytag/", abstract = "This paper investigates the problem of translating set-oriented query specifications into iterative programs. The translation uses techniques of functional programming and program transformation.\par We present two algorithms that generate iterative programs from algebra-based query specifications. The first algorithm translates query specifications into recursive programs. Those are simplified by sets of transformation rules before the algorithm generates the final iterative form. The second algorithm uses a two-level translation that generates iterative programs faster than the first algorithm. On the first level a small set of transformation rules performs structural simplification before the functional combination on the second level yields the final iterative form.", acknowledgement = ack-nhfb, affiliation = "Eur. Comput. Ind. Res. Centre, Munich, West Germany", affiliationaddress = "Cambridge, MA, USA", annote = "Functional programming is used for the formulation of simple algebraic rules", classification = "723; 921", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Languages; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algebraic specification; algorithms; computer programming --- algorithms; functional programming; iterative programs; languages; mathematical techniques --- algebra; performance; program transformation; relational; relational queries; theory, database systems", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf D.1.1}: Software, PROGRAMMING TECHNIQUES, Applicative (Functional) Programming. {\bf I.2.2}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Automatic Programming, Program transformation. {\bf H.2.5}: Information Systems, DATABASE MANAGEMENT, Heterogeneous Databases, Program translation.", } @Article{Ahad:1989:ECP, author = "Rafiul Ahad and K. V. {Bapa Rao} and Dennis McLeod", title = "On Estimating the Cardinality of the Projection of a Database Relation", journal = j-TODS, volume = "14", number = "1", pages = "28--40", month = mar, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1989-14-1/p28-ahad/p28-ahad.pdf; http://www.acm.org/pubs/citations/journals/tods/1989-14-1/p28-ahad/; http://www.acm.org/pubs/toc/Abstracts/tods/62034.html", abstract = "We present an analytical formula for estimating the cardinality of the projection on certain attributes of a subset of a relation in a relational database. This formula takes into account a priori knowledge of the semantics of the real-world objects and relationships that the database is intended to represent. Experimental testing of the formula shows that it has an acceptably low percentage error, and that its worst-case error is smaller than the best-known formula. Furthermore, the formula presented here has the advantage that it does not require a scan of the relation.", acknowledgement = ack-nhfb, affiliation = "Maryland Univ., College Park, MD, USA", affiliationaddress = "College Park, MD, USA", annote = "Uses normal distribution estimates.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Experimentation; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "application semantics; cardinality estimation, data semantics selectivity estimation TODS, design; computer simulation; data semantics; database systems; experimentation; measurement; performance; relational", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing.", } @Article{Ramesh:1989:VDT, author = "R. Ramesh and A. J. G. Babu and J. Peter Kincaid", title = "Variable-Depth Trie Index Optimization: Theory and Experimental Results", journal = j-TODS, volume = "14", number = "1", pages = "41--74", month = mar, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1989-14-1/p41-ramesh/p41-ramesh.pdf; http://www.acm.org/pubs/citations/journals/tods/1989-14-1/p41-ramesh/; http://www.acm.org/pubs/toc/Abstracts/tods/77249.html", abstract = "We develop an efficient approach to Trie index optimization. A {\em Trie\/} is a data structure used to index a file having a set of attributes as record identifiers. In the proposed methodology, a file is horizontally partitioned into subsets of records using a Trie index whose depth of indexing is allowed to vary. The retrieval of a record from the file proceeds by ``stepping through'' the index to identify a subset of records in the file in which a binary search is performed. This paper develops a taxonomy of optimization problems underlying variable-depth Trie index construction. All these problems are solvable in polynomial time, and their characteristics are studied. Exact algorithms and heuristics for their solution are presented. The algorithms are employed in CRES-an expert system for editing written narrative material, developed for the Department of the Navy. CRES uses several large-to-very-large dictionary files for which Trie indexes are constructed using these algorithms. Computational experience with CRES shows that search and retrieval using variable-depth Trie indexes can be as much as six times faster than pure binary search. The space requirements of the Tries are reasonable. The results show that the variable-depth Tries constructed according to the proposed algorithms are viable and efficient for indexing large-to-very-large files by attributes in practical applications.", acknowledgement = ack-nhfb, affiliation = "State Univ. of New York, Buffalo, NY, USA", affiliationaddress = "Buffalo, NY, USA", classification = "723; 921", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Experimentation; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "artificial intelligence --- expert systems; computer editing; computer programming --- algorithms; CRES expert system; data processing; data structures; design; experimentation; information science --- information retrieval; optimization; performance; theory; trie index optimization, algorithms", subject = "{\bf E.1}: Data, DATA STRUCTURES, Trees. {\bf E.5}: Data, FILES. {\bf E.2}: Data, DATA STORAGE REPRESENTATIONS, Contiguous representations. {\bf H.3.1}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Content Analysis and Indexing. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Search process. {\bf E.2}: Data, DATA STORAGE REPRESENTATIONS, Linked representations.", } @Article{Gladney:1989:DRD, author = "H. M. Gladney", title = "Data Replicas in Distributed Information Services", journal = j-TODS, volume = "14", number = "1", pages = "75--97", month = mar, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; Distributed/CCR.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "{\it Reviews}: Computing Reviews, Vol. 30, No. 11, November 1989", URL = "http://www.acm.org/pubs/articles/journals/tods/1989-14-1/p75-gladney/p75-gladney.pdf; http://www.acm.org/pubs/citations/journals/tods/1989-14-1/p75-gladney/; http://www.acm.org/pubs/toc/Abstracts/tods/62035.html", abstract = "In an information distribution network in which records are repeatedly read, it is cost-effective to keep read-only copies in work locations. This paper presents a method of updating replicas that need not be immediately synchronized with the source data or with each other. The method allows an arbitrary mapping from source records to replica records. It is fail-safe, maximizes workstation autonomy, and is well suited to a network with slow, unreliable, and/or expensive communications links.\par The algorithm is a manipulation of queries, which are represented as short encodings. When a response is generated, we record which portion of the source database was used. Later, when the source data are updated, this information is used to identify obsolete replicas. For each workstation, the identity of obsolete replicas is saved until a workstation process asks for this information. This workstation process deletes each obsolete replica, and replaces it by an up-to-date version either promptly or the next time the application asks for this particular item. Throughout, queries are grouped so that the impact of each source update transaction takes effect atomically at each workstation.\par Optimizations of the basic algorithm are outlined. These overlap change dissemination with user service, allow the mechanism to be hidden within the data delivery subsystem, and permit very large networks.", acknowledgement = ack-nhfb, affiliation = "IBM Almaden Res. Center, San Jose, CA, USA", affiliationaddress = "San Jose, CA, USA", annote = "Server and workstations.", classification = "723; 903; 921", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Languages; Performance; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming --- algorithms; design; distributed; information services; languages; optimization, algorithms; performance; reliability; reviews, database systems", subject = "{\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed applications. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems.", } @Article{McLeish:1989:FRS, author = "Mary McLeish", title = "Further Results on the Security of Partitioned Dynamic Statistical Databases", journal = j-TODS, volume = "14", number = "1", pages = "98--113", month = mar, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1989-14-1/p98-mcleish/p98-mcleish.pdf; http://www.acm.org/pubs/citations/journals/tods/1989-14-1/p98-mcleish/; http://www.acm.org/pubs/toc/Abstracts/tods/62036.html", abstract = "Partitioning is a highly secure approach to protecting statistical databases. When updates are introduced, security depends on putting restrictions on the sizes of partition sets which may be queried. To overcome this problem, attempts have been made to add ``dummy'' records. Recent work has shown that this leads to high information loss.\par This paper reconsiders the restrictions on the size of partitioning sets required to achieve a high level of security. Updates of two records at a time were studied earlier, and security was found to hold if the sizes of the partition sets were kept even. In this paper an extended model is presented, allowing very general updates to be performed. The security problem is thoroughly studied, giving if and only if conditions. The earlier result is shown to be part of a corollary to the main theorem of this paper. Alternatives to adding dummy records are presented and the practical implications of the theory for the database manager are discussed.", acknowledgement = ack-nhfb, affiliation = "Guelph Univ., Ont., Canada", affiliationaddress = "Guelph, Ont, Can", classification = "723; 922", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance; Security; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data processing --- security of data; database security, algorithms; database systems; partitioned databases; performance; security; statistical databases; statistical methods; theory", subject = "{\bf H.2.0}: Information Systems, DATABASE MANAGEMENT, General, Security, integrity, and protection. {\bf G.3}: Mathematics of Computing, PROBABILITY AND STATISTICS. {\bf K.4.1}: Computing Milieux, COMPUTERS AND SOCIETY, Public Policy Issues, Privacy.", } @Article{Bic:1989:ADD, author = "Lubomir Bic and Robert L. Hartmann", title = "{AGM}: a Dataflow Database Machine", journal = j-TODS, volume = "14", number = "1", pages = "114--146", month = mar, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1989-14-1/p114-bic/p114-bic.pdf; http://www.acm.org/pubs/citations/journals/tods/1989-14-1/p114-bic/; http://www.acm.org/pubs/toc/Abstracts/tods/62037.html", abstract = "In recent years, a number of database machines consisting of large numbers of parallel processing elements have been proposed. Unfortunately, there are two main limitations in database processing that prevent a high degree of parallelism; these are the available I/O bandwidth of the underlying storage devices and the concurrency control mechanisms necessary to guarantee data integrity. The main problem with conventional approaches is the lack of a computational model capable of utilizing the potential of any significant number of processing elements and storage devices and, at the same time, preserving the integrity of the database.\par This paper presents a database model and its associated architecture, which is based on the principles of data-driven computation. According to this model, the database is represented as a network in which each node is conceptually an independent, asynchronous processing element, capable of communicating with other nodes by exchanging messages along the network arcs. To answer a query, one or more such messages, called tokens, are created and injected into the network. These then propagate asynchronously through the network in search of results satisfying the given query.\par The asynchronous nature of processing permits the model to be mapped onto a computer architecture consisting of large numbers of independent disk units and processing elements. This increases both the available I/O bandwidth as well as the processing potential of the machine. At the same time, new concurrency control and error recovery mechanisms are necessary to cope with the resulting parallelism.", acknowledgement = ack-nhfb, affiliation = "California Univ., Irvine, CA, USA", affiliationaddress = "Irvine, CA, USA", classification = "722; 723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Active Graph Machine, design; AGM Model; Computer Architecture; Database Machines; Database Systems; Dataflow Computing; hardware support token flow on multi-processor data connection graph TODS; languages; Models; performance", subject = "{\bf H.2.6}: Information Systems, DATABASE MANAGEMENT, Database Machines. {\bf C.1.2}: Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Parallel processors. {\bf C.1.3}: Computer Systems Organization, PROCESSOR ARCHITECTURES, Other Architecture Styles, Data-flow architectures. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf C.4}: Computer Systems Organization, PERFORMANCE OF SYSTEMS, Modeling techniques.", } @Article{Yu:1989:FER, author = "C. T. Yu and W. Meng and S. Park", title = "A Framework for Effective Retrieval", journal = j-TODS, volume = "14", number = "2", pages = "147--167", month = jun, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P20", MRnumber = "1 072 201", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1989-14-2/p147-yu/p147-yu.pdf; http://www.acm.org/pubs/citations/journals/tods/1989-14-2/p147-yu/; http://www.acm.org/pubs/toc/Abstracts/tods/63519.html", abstract = "The aim of an effective retrieval system is to yield high recall and precision (retrieval effectiveness). The nonbinary independence model, which takes into consideration the number of occurrences of terms in documents, is introduced. It is shown to be optimal under the assumption that terms are independent. It is verified by experiments to yield significant improvement over the binary independence model. The nonbinary model is extended to normalized vectors and is applicable to more general queries.\par Various ways to alleviate the consequences of the term independence assumption are discussed. Estimation of parameters required for the nonbinary independence model is provided, taking into consideration that a term may have different meanings.", acknowledgement = ack-nhfb, acmcrnumber = "8912-0901", affiliation = "Dept. of Electr. Eng. and Comput. Sci., Illinois Univ., Chicago, IL, USA", affiliationaddress = "Chicago, IL, USA", annote = "probabilistic models with parameters estimated from previously retrieved relevant and irrelevant documents.", classification = "723; 903; 921; 922", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Database Systems; Information Retrieval; Information Science; Mathematical Techniques --- Estimation; Nonbinary Independence Model; Parameter Estimation; Retrieval Effectiveness", subject = "{\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Retrieval models. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Query formulation. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Clustering.", } @Article{Embley:1989:NNF, author = "David W. Embley", title = "{NFQL}: The {Natural Forms Query Language}", journal = j-TODS, volume = "14", number = "2", pages = "168--211", month = jun, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: Brigham Young Un., TR-CS-87-6, Mar. 1987.", URL = "http://www.acm.org/pubs/articles/journals/tods/1989-14-2/p168-embley/p168-embley.pdf; http://www.acm.org/pubs/citations/journals/tods/1989-14-2/p168-embley/; http://www.acm.org/pubs/toc/Abstracts/tods/64125.html", abstract = "A means by which ordinary forms can be exploited to provide a basis for nonprocedural specification of information processing is discussed. The Natural Forms Query Language (NFQL) is defined. In NFQL data retrieval requests and computation specifications are formulated by sketching ordinary forms to show what data are desired and update operations are specified by altering data on filled-in forms. The meaning of a form depends on a store of knowledge that includes extended abstract data types for defining elementary data items, a database scheme defined by an entity-relationship model, and a conceptual model of an ordinary form. Based on this store of knowledge, several issues are addressed and resolved in the context of NFQL. These issues include automatic generation of query expressions from weak specifications, the view update problem, power and completeness, and a heuristic approach to resolving computational relationships. A brief status report of an implementation of NFQL is also given.", acknowledgement = ack-nhfb, affiliation = "Brigham Young Univ., Prov., UT, USA", affiliationaddress = "Provo, UT, USA", annote = "Uses surrogate keys in the ER model.", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database systems; entity-relationship model; forms oriented interfaces; informal software specification; information science --- information retrieval; languages; natural forms query language; nonprocedural specification; query languages; relational database, algorithms; theory", subject = "{\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, NFQL. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema. {\bf H.4.1}: Information Systems, INFORMATION SYSTEMS APPLICATIONS, Office Automation. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Query formulation. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data description languages (DDL). {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data manipulation languages (DML). {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages.", } @Article{Atzeni:1989:EOS, author = "Paolo Atzeni and Edward P. F. Chan", title = "Efficient Optimization of Simple Chase Join Expressions", journal = j-TODS, volume = "14", number = "2", pages = "212--230", month = jun, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "1 072 202", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1989-14-2/p212-atzeni/p212-atzeni.pdf; http://www.acm.org/pubs/citations/journals/tods/1989-14-2/p212-atzeni/; http://www.acm.org/pubs/toc/Abstracts/tods/63520.html", abstract = "Simple chase join expressions are relational algebra expressions, involving only projection and join operators, defined on the basis of the functional dependencies associated with the database scheme. They are meaningful in the weak instance model, because for certain classes of schemes, including independent schemes, the total projections of the representative instance can be computed by means of unions of simple chase join expressions. We show how unions of simple chase join expressions can be optimized efficiently, without constructing and chasing the corresponding tableaux. We also present efficient algorithms for testing containment and equivalence, and for optimizing individual simple chase join expressions.", acknowledgement = ack-nhfb, affiliation = "IASI-CNR, Rome, Italy", affiliationaddress = "Rome, Italy", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Languages; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer programming --- algorithms; containment testing; database systems; equivalence testing, algorithms; functional dependencies; languages; optimization; performance; relational; relational algebra expressions; simple chase join expressions; theory", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Normal forms.", } @Article{Ramakrishna:1989:FOU, author = "M. V. Ramakrishna and Per-{\AA}ke Larson", title = "File Organization Using Composite Perfect Hashing", journal = j-TODS, volume = "14", number = "2", pages = "231--263", day = "1", month = jun, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; UnCover library database", URL = "http://www.acm.org/pubs/articles/journals/tods/1989-14-2/p231-ramakrishna/p231-ramakrishna.pdf; http://www.acm.org/pubs/citations/journals/tods/1989-14-2/p231-ramakrishna/; http://www.acm.org/pubs/toc/Abstracts/tods/63521.html", abstract = "Perfect hashing refers to hashing with no overflows. We propose and analyze a composite perfect hashing scheme for large external files. The scheme guarantees retrieval of any record in a single disk access. Insertions and deletions are simple, and the file size may vary considerably without adversely affecting the performance. A simple variant of the scheme supports efficient range searches in addition to being a completely dynamic file organization scheme. These advantages are achieved at the cost of a small amount of additional internal storage and increased cost of insertions.", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Michigan State Univ., East Lansing, MI, USA", affiliationaddress = "East Lansing, MI, USA", classification = "723; 903", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Experimentation; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "composite perfect hashing; data processing; database systems; design; dynamic file organization, algorithms; experimentation; file organization; information science --- information retrieval; large external files; performance; range searches", subject = "{\bf E.2}: Data, DATA STORAGE REPRESENTATIONS, Hash-table representations. {\bf F.2.2}: Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Nonnumerical Algorithms and Problems, Sorting and searching. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods.", } @Article{ElAbaddi:1989:MAP, author = "Amr {El Abaddi} and Sam Toueg", title = "Maintaining Availability in Partitioned Replicated Databases", journal = j-TODS, volume = "14", number = "2", pages = "264--290", month = jun, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "1 072 203", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Compendex database; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: ACM SIGACT-SIGMOD Symp. on Principles of Database Systems, Cambridge MA, Mar. 1986.", URL = "http://www.acm.org/pubs/articles/journals/tods/1989-14-2/p264-abbadi/p264-abbadi.pdf; http://www.acm.org/pubs/citations/journals/tods/1989-14-2/p264-abbadi/; http://www.acm.org/pubs/toc/Abstracts/tods/63501.html", abstract = "In a replicated database, a data item may have copies residing on several sites. A replica control protocol is necessary to ensure that data items with several copies behave as if they consist of a single copy, as far as users can tell. We describe a new replica control protocol that allows the accessing of data in spite of site failures and network partitioning. This protocol provides the database designer with a large degree of flexibility in deciding the degree of data availability, as well as the cost of accessing data.", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Cornell Univ., Ithaca, NY, USA", affiliationaddress = "Ithaca, NY, USA", classification = "723", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "computer operating systems; concurrency control; database availability, algorithms; database systems; design; distributed; partitioned replicated databases; partitioning failures; performance; reliability; replica control; serializability", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf D.4.3}: Software, OPERATING SYSTEMS, File Systems Management, Distributed file systems. {\bf D.4.5}: Software, OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf C.2.2}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods.", } @Article{Hudson:1989:CSA, author = "Scott E. Hudson and Roger King", title = "{Cactis}: a Self-Adaptive, Concurrent Implementation of an Object-Oriented Database Management System", journal = j-TODS, volume = "14", number = "3", pages = "291--321", month = sep, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1989-14-3/p291-hudson/p291-hudson.pdf; http://www.acm.org/pubs/citations/journals/tods/1989-14-3/p291-hudson/; http://www.acm.org/pubs/toc/Abstracts/tods/68013.html", abstract = "Cactis is an object-oriented, multiuser DBMS developed at the University of Colorado. The system supports functionally-defined data and uses techniques based on attributed graphs to optimize the maintenance of functionally-defined data. The implementation is self-adaptive in that the physical organization and the update algorithms dynamically change in order to reduce disk access. The system is also concurrent. At any given time there are some number of computations that must be performed to bring the database up to date; these computations are scheduled independently and are performed when the expected cost to do so is minimal. The DBMS runs in the Unix/C Sun workstation environment. Cactis is designed to support applications that require rich data modeling capabilities and the ability to specify functionally-defined data, but that also demand good performance. Specifically, Cactis is intended for use in the support of such applications as VLSI and PCB design, and software environments.", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Arizona Univ., Tucson, AZ, USA", annote = "attributes of objects can be derived; dynamic definition of a scheme, depending on the user's requirements functionally defined data, intelligent update algorithms", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; clustering derived update propagation TODS; design; performance", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Cactis. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design. {\bf J.6}: Computer Applications, COMPUTER-AIDED ENGINEERING.", } @Article{Sheard:1989:AVD, author = "Tim Sheard and David Stemple", title = "Automatic Verification of Database Transaction Safety", journal = j-TODS, volume = "14", number = "3", pages = "322--368", month = sep, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: U. Mass, COINS TR-88-29, Apr. 1988, also TR-86-30, 1986.", URL = "http://www.acm.org/pubs/articles/journals/tods/1989-14-3/p322-sheard/p322-sheard.pdf; http://www.acm.org/pubs/citations/journals/tods/1989-14-3/p322-sheard/; http://www.acm.org/pubs/toc/Abstracts/tods/68014.html", abstract = "Maintaining the integrity of databases is one of the promises of database management systems. This includes assuring that integrity constraints are invariants of database transactions. This is very difficult to accomplish efficiently in the presence of complex constraints and large amounts of data. One way to minimize the amount of processing required to maintain database integrity over transaction processing is to prove at compile-time that transactions cannot, if run atomically, disobey integrity constraints. We report on a system that performs such verification for a robust set of constraint and transaction classes. The system accepts database schemas written in a more or less traditional style and accepts programs in a high-level programming language. Automatic verification fast enough to be effective on current workstation hardware is performed.", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. and Inf. Sci., Massachusetts Univ., Amherst, MA, USA", annote = "LISP; inference techniques based on Boyer--Moore.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Reliability; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; reliability; verification", subject = "{\bf H.2.0}: Information Systems, DATABASE MANAGEMENT, General, Security, integrity, and protection. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema. {\bf D.2.4}: Software, SOFTWARE ENGINEERING, Program Verification.", } @Article{Blakeley:1989:UDR, author = "Jos{\'e} A. Blakeley and Neil Coburn and Per-{\AA}ke Larson", title = "Updating Derived Relations: Detecting Irrelevant and Autonomously Computable Updates", journal = j-TODS, volume = "14", number = "3", pages = "369--400", month = sep, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "1 073 202", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: Indiana Un., TR-235, Nov. 1987. Also published in \cite{Kambayashi:1986:TIC}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1989-14-3/p369-blakeley/p369-blakeley.pdf; http://www.acm.org/pubs/citations/journals/tods/1989-14-3/p369-blakeley/; http://www.acm.org/pubs/toc/Abstracts/tods/68015.html", abstract = "Consider a database containing not only base relations but also stored derived relations (also called materialized or concrete views). When a base relation is updated, it may also be necessary to update some of the derived relations. This paper gives sufficient and necessary conditions for detecting when an update of a base relation cannot affect a derived relation (an irrelevant update), and for detecting when a derived relation can be correctly updated using no data other than the derived relation itself and the given update operation (an autonomously computable update). The class of derived relations considered is restricted to those defined by {\em PSJ\/}-expressions, that is, any relational algebra expressions constructed from an arbitrary number of project, select and join operations (but containing no self-joins). The class of update operations consists of insertions, deletions, and modifications, where the set of tuples to be deleted or modified is specified by a selection condition on attributes of the relation being updated.", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Waterloo Univ., Ont., Canada", annote = "Identity implementation; rigorous mathematical proofs; expressions that are capable of being tested constitute a large and commonly occurring class.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; design; performance; theory", subject = "{\bf H.2.0}: Information Systems, DATABASE MANAGEMENT, General. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods.", } @Article{Mackert:1989:ISU, author = "Lothar F. Mackert and Guy M. Lohman", title = "Index Scans Using a Finite {LRU} Buffer: a Validated {I/O} Model", journal = j-TODS, volume = "14", number = "3", pages = "401--424", month = sep, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: IBM, TR-RC-4836, Sep. 1985.", URL = "http://www.acm.org/pubs/articles/journals/tods/1989-14-3/p401-mackert/p401-mackert.pdf; http://www.acm.org/pubs/citations/journals/tods/1989-14-3/p401-mackert/; http://www.acm.org/pubs/toc/Abstracts/tods/68016.html", abstract = "Indexes are commonly employed to retrieve a portion of a file or to retrieve its records in a particular order. An accurate performance model of indexes is essential to the design, analysis, and tuning of file management and database systems, and particularly to database query optimization. Many previous studies have addressed the problem of estimating the number of disk page fetches when randomly accessing $k$ records out of $N$ given records stored on $T$ disk pages. This paper generalizes these results, relaxing two assumptions that usually do not hold in practice: unlimited buffer and unique records for each key value. Experiments show that the performance of an index scan is very sensitive to buffer size limitations and multiple records per key value. A model for these more practical situations is presented and a formula derived for estimating the performance of an index scan. We also give a closed-form approximation that is easy to compute. The theoretical results are validated using the $R$ * distributed relational database system. Although we use database terminology throughout the paper, the model is more generally applicable whenever random accesses are made using keys.", acknowledgement = ack-nhfb, affiliation = "IBM Almaden Res. Center, San Jose, CA, USA", annote = "consider limited buffer and now unique records for each key value.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Measurement; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; measurement; performance; theory, clustering non-clustering B-tree lookup cost Starburst System R IBM Almaden TODS", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.3.2}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Storage, File organization. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Retrieval models. {\bf H.3.1}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Content Analysis and Indexing, Indexing methods. {\bf E.5}: Data, FILES, Organization/structure. {\bf C.4}: Computer Systems Organization, PERFORMANCE OF SYSTEMS, Performance attributes.", } @Article{Manolopoulos:1989:PTH, author = "Y. Manolopoulos and J. G. Kollias", title = "Performance of a Two-Headed Disk System when Serving Database Queries Under the Scan Policy", journal = j-TODS, volume = "14", number = "3", pages = "425--442", month = sep, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1989-14-3/p425-manolopoulos/p425-manolopoulos.pdf; http://www.acm.org/pubs/citations/journals/tods/1989-14-3/p425-manolopoulos/; http://www.acm.org/pubs/toc/Abstracts/tods/68017.html", abstract = "Disk drives with movable two-headed arms are now commercially available. The two heads are separated by a fixed number of cylinders. A major problem for optimizing disk head movement, when answering database requests, is the specification of the optimum number of cylinders separating the two heads. An earlier analytical study assumed a FCFS model and concluded that the optimum separation distance should be equal to 0.44657 of the number of cylinders $N$ of the disk. This paper considers that the SCAN scheduling policy is used in file access, and it applies combinatorial analysis to derive exact formulas for the expected head movement. Furthermore, it is proven that the optimum separation distance is $ N / 2 - 1 $ ($ \lceil N / 2 - 1 \rceil $ and $ \lfloor N / 2 - 1 \rfloor $ ) if $N$ is even (odd). In addition, a comparison with a single-headed disk system operating under the same scheduling policy shows that if the two heads are optimally spaced, then the mean seek distance is less than one-half of the value obtained with one head. In fact that the SCAN policy is used for many database applications (for example,batching and secondary key retrieval) demonstrates the potential of two-headed disk systems for improving the performance of database systems.", acknowledgement = ack-nhfb, affiliation = "Dept. of Electr. Eng., Aristotelian Univ. of Thessaloniki, Greece", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; hardware support I/O seek machine TODS, algorithms; performance", subject = "{\bf D.4.2}: Software, OPERATING SYSTEMS, Storage Management, Secondary storage. {\bf D.4.1}: Software, OPERATING SYSTEMS, Process Management, Scheduling. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf F.2.2}: Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Nonnumerical Algorithms and Problems, Sequencing and scheduling.", } @Article{Dreizen:1989:ISR, author = "Howard M. Dreizen and Shi-Kuo Chang", title = "Imprecise Schema: a Rationale for Relations with Embedded Subrelations", journal = j-TODS, volume = "14", number = "4", pages = "447--479", month = dec, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Misc/is.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1989-14-4/p447-dreizen/p447-dreizen.pdf; http://www.acm.org/pubs/citations/journals/tods/1989-14-4/p447-dreizen/; http://www.acm.org/pubs/toc/Abstracts/tods/76903.html", abstract = "{\em Exceptional conditions\/} are anomalous data which meet the intent of a schema but not the schema definition, represent a small proportion of the database extension, and may become known only after the schema is in use. Admission of exceptional conditions is argued to suggest a representation that locally stretches the schema definition by use of relations with embedded subrelations. Attempted normalization of these relations to 1NF does not yield the static schema typically associated with such transformations. A class of relations, termed Exceptional Condition Nested Form (ECNF), is defined which allows the necessary representation of exceptional conditions while containing sufficient restrictions to prevent arbitrary and chaotic inclusion of embedded subrelations. Queries on a subset of exceptional conditions, the {\em exceptional constraints}, are provided an interpretation via an algorithm that transforms ECNF relations into 1NF relations containing two types of null values. Extensions of relational algebraic operators, suitable for interactive query navigation, are defined for use with ECNF relations containing all forms of exceptional conditions.", acknowledgement = ack-nhfb, affiliation = "Illinois Inst. of Technol., Chicago, IL, USA", annote = "If the data contain only a few `exceptional' tuples, their effect might better be confined to a local schema change rather than changing the global schema; the effect of processing is only partly answered.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; design; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing.", } @Article{Motro:1989:IVC, author = "Amihai Motro", title = "Integrity = Validity + Completeness", journal = j-TODS, volume = "14", number = "4", pages = "480--502", month = dec, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1989-14-4/p480-motro/p480-motro.pdf; http://www.acm.org/pubs/citations/journals/tods/1989-14-4/p480-motro/; http://www.acm.org/pubs/toc/Abstracts/tods/76904.html", abstract = "Database integrity has two complementary components: {\em validity}, which guarantees that all false information is excluded from the database, and {\em completeness}, which guarantees that all true information is included in the database. This article describes a uniform model of integrity for relational databases, that considers both validity and completeness. To a large degree, this model subsumes the prevailing model of integrity (i.e., integrity constraints). One of the features of the new model is the determination of the integrity of answers issued by the database system in response to user queries. To users, answers that are accompanied with such detailed certifications of their integrity are more meaningful. First, the model is defined and discussed. Then, a specific mechanism is described that implements this model. With this mechanism, the determination of the integrity of an answer is a process analogous to the determination of the answer itself.", acknowledgement = ack-nhfb, affiliation = "Univ. of Southern California, Los Angeles, CA, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; design; performance; theory", subject = "{\bf H.2.0}: Information Systems, DATABASE MANAGEMENT, General, Security, integrity, and protection. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models.", } @Article{Farrag:1989:USK, author = "Abdel Aziz Farrag and M. Tamer {\"O}zsu", title = "Using Semantic Knowledge of Transactions to Increase Concurrency", journal = j-TODS, volume = "14", number = "4", pages = "503--525", month = dec, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Accepted. Also published in/as: Un. Alberta, DCS, TR-85-11, Jul. 1985.", URL = "http://www.acm.org/pubs/articles/journals/tods/1989-14-4/p503-farrag/p503-farrag.pdf; http://www.acm.org/pubs/citations/journals/tods/1989-14-4/p503-farrag/; http://www.acm.org/pubs/toc/Abstracts/tods/76905.html", abstract = "When the only information available about transactions is syntactic information, serializability is the main correctness criterion for concurrency control. Serializability requires that the execution of each transaction must appear to every other transaction as a single atomic step (i.e., the execution of the transaction cannot be interrupted by other transactions). Many researchers, however, have realized that this requirement is unnecessarily strong for many applications and can significantly increase transaction response time. To overcome this problem, a new approach for controlling concurrency that exploits the semantic information available about transactions to allow controlled nonserializable interleavings has recently been proposed. This approach is useful when the cost of producing only serializable interleavings is unacceptably high. The main drawback of the approach is the extra overhead incurred by utilizing the semantic information. We examine this new approach in this paper and discuss its strengths and weaknesses. We introduce a new formalization for the concurrency control problem when semantic information is available about the transactions. This semantic information takes the form of transaction types, transaction steps, and transaction break-points. We define a new class of ``safe'' schedules called relatively consistent (RC) schedules. This class contains serializable as well as nonserializable schedules. We prove that the execution of an RC schedule cannot violate consistency and propose a new concurrency control mechanism that produces only RC schedules. Our mechanism assumes fewer restrictions on the interleavings among transactions than previously introduced semantic-based mechanisms.", acknowledgement = ack-nhfb, affiliation = "Dalhousie Univ., Halifax, NS, Canada", annote = "By setting breakpoints to interleave compatible transactions. But aborts can require rollbacks or offsetting transactions. Compatible transactions can interleave. Nested compatibility in interleaving transactions.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; design; theory", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing.", } @Article{Ozsoyoglu:1989:QPT, author = "Gultekin {\"O}zsoyo{\u{g}}lu and Victor Matos and Z. Meral {\"O}zsoyo{\u{g}}lu", title = "Query Processing Techniques in the Summary-Table-by-Example Database Query Language", journal = j-TODS, volume = "14", number = "4", pages = "526--573", month = dec, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1989-14-4/p526-ozsoyoglu/p526-ozsoyoglu.pdf; http://www.acm.org/pubs/citations/journals/tods/1989-14-4/p526-ozsoyoglu/; http://www.acm.org/pubs/toc/Abstracts/tods/76906.html", abstract = "Summary-Table-by-Example (STBE) is a graphical language suitable for statistical database applications. STBE queries have a hierarchical subquery structure and manipulate summary tables and relations with set-valued attributes.\par The hierarchical arrangement of STBE queries naturally implies a tuple-by-tuple subquery evaluation strategy (similar to the nested loops join implementation technique) which may not be the best query processing strategy. In this paper we discuss the query processing techniques used in STBE. We first convert an STBE query into an ``extended'' relational algebra (ERA) expression. Two transformations are introduced to remove the hierarchical arrangement of subqueries so that query optimization is possible. To solve the ``empty partition'' problem of aggregate function evaluation, directional join (one-sided outer-join) is utilized. We give the algebraic properties of the ERA operators to obtain an ``improved'' ERA expression. Finally we briefly discuss the generation of alternative implementations of a given ERA expression. \par STBE is implemented in a prototype statistical database management system. We discuss the STBE-related features of the implemented system.", acknowledgement = ack-nhfb, affiliation = "Case Western Reserve Univ., Cleveland, OH, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; design; languages; theory", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf G.3}: Mathematics of Computing, PROBABILITY AND STATISTICS, Statistical software.", } @Article{Grady:1989:EJO, author = "Dani{\`e}le Grady and Claude Puech", title = "On the Effect of Join Operations on Relation Sizes", journal = j-TODS, volume = "14", number = "4", pages = "574--603", month = dec, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "1 073 203", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1989-14-4/p574-grady/p574-grady.pdf; http://www.acm.org/pubs/citations/journals/tods/1989-14-4/p574-grady/; http://www.acm.org/pubs/toc/Abstracts/tods/76907.html", abstract = "We propose a generating function approach to the problem of evaluating the sizes of derived relations in a relational database framework. We present a model of relations and show how to use it to deduce probabilistic estimations of derived relation sizes. These are found to asymptotically follow normal distributions under a variety of assumptions.", acknowledgement = ack-nhfb, affiliation = "Univ. Paris-Sud, Orsay, France", annote = "Polynomial generating function is given; derived relation sizes asymptotically follow normal distributions.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; design; generating functions; performance; selectivity estimation; theory; TODS", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf G.2.1}: Mathematics of Computing, DISCRETE MATHEMATICS, Combinatorics, Generating functions. {\bf F.2.2}: Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Nonnumerical Algorithms and Problems, Computations on discrete structures. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval.", } @Article{Lang:1989:UAB, author = "Sheau-Dong Lang and James R. Driscoll and Jiann H. Jou", title = "A Unified Analysis of Batched Searching of Sequential and Tree-Structured Files", journal = j-TODS, volume = "14", number = "4", pages = "604--618", month = dec, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P10 (68P20)", MRnumber = "1 073 204", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1989-14-4/p604-lang/p604-lang.pdf; http://www.acm.org/pubs/citations/journals/tods/1989-14-4/p604-lang/; http://www.acm.org/pubs/toc/Abstracts/tods/76908.html", abstract = "A direct and unified approach is used to analyze the efficiency of batched searching of sequential and tree-structured files. The analysis is applicable to arbitrary search distributions, and closed-form expressions are obtained for the expected batched searching cost and savings. In particular, we consider a search distribution satisfying Zipf's law for sequential files and four types of uniform (random) search distribution for sequential and tree-structured files. These results unify and extend earlier research on batched searching and estimating block accesses for database systems.", acknowledgement = ack-nhfb, affiliation = "Central Florida Univ., Orlando, FL, USA", annote = "closed-form expressions for the number of accesses needed given arbitrary search distributions.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; design; performance; theory", subject = "{\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Search process. {\bf H.3.2}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Storage, File organization. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods.", } @Article{Motro:1989:QDK, author = "A. Motro and Q. Yuan", title = "Querying Database Knowledge", journal = j-TODS, volume = "14", number = "4", pages = "??--??", month = dec, year = "1989", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibsource = "Database/Wiederhold.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in \cite{Garcia-Molina:1990:PAS}.", annote = "The describe statement inquires about the meaning of a concept under specified circumstances", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", xxnote = "This paper does not seem to be published in TODS.", } @Article{Liu:1990:IMI, author = "Ken-Chih C. Liu and Rajshekhar Sunderraman", title = "Indefinite and Maybe Information in Relational Databases", journal = j-TODS, volume = "15", number = "1", pages = "1--39", month = mar, year = "1990", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "1 073 205", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1990-15-1/p1-liu/p1-liu.pdf; http://www.acm.org/pubs/citations/journals/tods/1990-15-1/p1-liu/; http://www.acm.org/pubs/toc/Abstracts/tods/77644.html", abstract = "This paper extends the relational model to represent indefinite and maybe kinds of incomplete information. A data structure, called an I-table, which is capable of representing indefinite and maybe facts, is introduced. The information content of I-tables is precisely defined, and an operator to remove redundant facts is presented. The relational algebra is then extended in a semantically correct way to operate on I-tables. Queries are posed in the same way as in conventional relational algebra; however, the user may now expect indefinite as well as maybe answers.", acknowledgement = ack-nhfb, affiliation = "AT\&T Bell Labs., Naperville, IL, USA", annote = "an I-table is capable of representing indefinite and maybe facts, is introduced; an operator to remove redundant facts is presented. The relational algebra is then extended; user may now expect indefinite as well as maybe answers", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; languages; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf E.1}: Data, DATA STRUCTURES, Tables. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval.", } @Article{Langerak:1990:VUR, author = "Rom Langerak", title = "View Updates in Relational Databases with an Independent Scheme", journal = j-TODS, volume = "15", number = "1", pages = "40--66", month = mar, year = "1990", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "1 073 206", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1990-15-1/p40-langerak/p40-langerak.pdf; http://www.acm.org/pubs/citations/journals/tods/1990-15-1/p40-langerak/; http://www.acm.org/pubs/toc/Abstracts/tods/77645.html", abstract = "A view on a database is a mapping that provides a user or application with a suitable way of looking at the data. Updates specified on a view have to be translated into updates on the underlying database. We study the view update translation problem for a relational data model in which the base relations may contain (indexed) nulls.\par The representative instance is considered to be the correct representation of all data in the database; the class of views that is studied consists of total projections of the representative instance. Only independent database schemes are considered, that is, schemes for which global consistency is implied by local consistency. A view update can be an insertion, a deletion, or a modification of a single view tuple. \par It is proven that the constant complement method of Bancilhon and Spyratos is too restrictive to be useful in this context. Structural properties of extension joins are derived that are important for understanding views. On the basis of these properties, minimal algorithms for translating a single view-tuple update are given.", acknowledgement = ack-nhfb, affiliation = "Fac. of Inf., Twente Univ., Enschede, Netherlands", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; theory; views TODS, algorithms", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval.", } @Article{Whang:1990:QOM, author = "Kyu-Young Y. Whang and Ravi Krishnamurthy", title = "Query Optimization in a Memory-Resident Domain Relational Calculus Database System", journal = j-TODS, volume = "15", number = "1", pages = "67--95", month = mar, year = "1990", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1990-15-1/p67-whang/p67-whang.pdf; http://www.acm.org/pubs/citations/journals/tods/1990-15-1/p67-whang/; http://www.acm.org/pubs/toc/Abstracts/tods/77646.html", abstract = "We present techniques for optimizing queries in memory-resident database systems. Optimization techniques in memory-resident database systems differ significantly from those in conventional disk-resident database systems. In this paper we address the following aspects of query optimization in such systems and present specific solutions for them: (1) a new approach to developing a CPU-intensive cost model; (2) new optimization strategies for main-memory query processing; (3) new insight into join algorithms and access structures that take advantage of memory residency of data; and (4) the effect of the operating system's scheduling algorithm on the memory-residency assumption. We present an interesting result that a major cost of processing queries in memory-resident database systems is incurred by evaluation of predicates. We discuss optimization techniques using the Office-by-Example (OBE) that has been under development at IBM Research. We also present the results of performance measurements, which prove to be excellent in the current state of the art. Despite recent work on memory-resident database systems, query optimization aspects in these systems have not been well studied. We believe this paper opens the issues of query optimization in memory-resident database systems and presents practical solutions to them.", acknowledgement = ack-nhfb, affiliation = "IBM Thomas J. Watson Res. Center, Yorktown Heights, NY, USA", annote = "Office-by-example extends the concept of query-by-example (QBE); disks are used only for permanent storage of data and backup; The technique is not a heuristic since it employs a systematic search, but uses the branch-and-bound algorithm. Uses the nested-loop join with use of indexes. An index is an array of tuple identifiers. Assess uses binary search. When an index entry is inserted, the upper half of the index is block-copied. In a 3081 processor copying 1 MB of memory takes less than 0.1 second. Queries in OBE are in the canonical form, have no substructures. Pure demand paging is not suitable, the system has a global goal for paging activities. The set of virtual machines on the dispatch list is determined.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Experimentation; Languages; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "experimentation; languages; Large Main Memory TODS, algorithms; performance", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods.", } @Article{Herlihy:1990:AVA, author = "Maurice Herlihy", title = "Apologizing Versus Asking Permission: Optimistic Concurrency Control for Abstract Data Types", journal = j-TODS, volume = "15", number = "1", pages = "96--124", month = mar, year = "1990", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68N25 (68P15 68Q65)", MRnumber = "1 073 207", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1990-15-1/p96-herlihy/p96-herlihy.pdf; http://www.acm.org/pubs/citations/journals/tods/1990-15-1/p96-herlihy/; http://www.acm.org/pubs/toc/Abstracts/tods/77647.html", abstract = "An optimistic concurrency control technique is one that allows transactions to execute without synchronization, relying on commit-time validation to ensure serializability. Several new optimistic concurrency control techniques for objects in decentralized distributed systems are described here, their correctness and optimality properties are proved, and the circumstances under which each is likely to be useful are characterized.\par Unlike many methods that classify operations only as Reads or Writes, these techniques systematically exploit type-specific properties of objects to validate more interleavings. Necessary and sufficient validation conditions can be derived directly from an object's data type specification. These techniques are also modular: they can be applied selectively on a per-object (or even per-operation) basis in conjunction with standard pessimistic techniques such as two-phase locking, permitting optimistic methods to be introduced exactly where they will be most effective.\par These techniques can be used to reduce the algorithmic complexity of achieving high levels of concurrency, since certain scheduling decisions that are NP-complete for pessimistic schedulers can be validated after the fact in time, independent of the level of concurrency. These techniques can also enhance the availability of replicated data, circumventing certain tradeoffs between concurrency and availability imposed by comparable pessimistic techniques.", acknowledgement = ack-nhfb, affiliation = "Carnegie Mellon Univ., Pittsburgh, PA, USA", annote = "new optimistic techniques for objects; exploit type-specific properties of objects to validate interleavings. These techniques reduce the complexity of achieving high levels of concurrency and enhance the availability of replicated data. Deals with hot spots such as counters, account balances, or queues.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Reliability; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "ADT TODS, algorithms; design; reliability; verification", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf D.3.3}: Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Abstract data types. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems. {\bf D.4.5}: Software, OPERATING SYSTEMS, Reliability, Verification. {\bf D.4.3}: Software, OPERATING SYSTEMS, File Systems Management, Distributed file systems.", } @Article{Wald:1990:EAF, author = "Joseph A. Wald and Paul G. Sorenson", title = "Explaining Ambiguity in a Formal Query Language", journal = j-TODS, volume = "15", number = "2", pages = "125--161", month = jun, year = "1990", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1990-15-2/p125-wald/p125-wald.pdf; http://www.acm.org/pubs/citations/journals/tods/1990-15-2/p125-wald/; http://www.acm.org/pubs/toc/Abstracts/tods/78923.html", abstract = "The problem of generating reasonable natural language-like responses to queries formulated in nonnavigational query languages with logical data independence is addressed. An extended ER model, the Entity-Relationship-Involvement model, is defined which assists in providing a greater degree of logical data independence and the generation of natural language explanations of a query processor's interpretation of a query. These are accomplished with the addition of the concept of an involvement to the model. Based on involvement definitions in a formally defined data definition language, DDL, an innovative strategy for generating explanations is outlined and exemplified. In the conclusion, possible extensions to the approach are given.", acknowledgement = ack-nhfb, affiliation = "Schlumberger Lab. for Comput. Sci., Austin, TX, USA", annote = "sorting out paths in the ER model.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; languages", subject = "{\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models.", } @Article{Chakravarthy:1990:LBA, author = "Upen S. Chakravarthy and John Grant and Jack Minker", title = "Logic-Based Approach to Semantic Query Optimization", journal = j-TODS, volume = "15", number = "2", pages = "162--207", month = jun, year = "1990", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1990-15-2/p162-chakravarthy/p162-chakravarthy.pdf; http://www.acm.org/pubs/citations/journals/tods/1990-15-2/p162-chakravarthy/; http://www.acm.org/pubs/toc/Abstracts/tods/78924.html", abstract = "The purpose of semantic query optimization is to use semantic knowledge (e.g., integrity constraints) for transforming a query into a form that may be answered more efficiently than the original version. In several previous papers we described and proved the correctness of a method for semantic query optimization in deductive databases couched in first-order logic. This paper consolidates the major results of these papers emphasizing the techniques and their applicability for optimizing relational queries. Additionally, we show how this method subsumes and generalizes earlier work on semantic query optimization. We also indicate how semantic query optimization techniques can be extended to databases that support recursion and integrity constraints that contain disjunction, negation, and recursion.", acknowledgement = ack-nhfb, annote = "consolidate results emphasizing the techniques and their applicability for optimizing relational queries; recursion and integrity constraints that contain disjunction, negation, and recursion.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; design; performance", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf I.2.3}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Deduction and Theorem Proving, Logic programming. {\bf H.2.0}: Information Systems, DATABASE MANAGEMENT, General, Security, integrity, and protection. {\bf I.2.8}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Problem Solving, Control Methods, and Search, Heuristic methods. {\bf I.2.8}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Problem Solving, Control Methods, and Search, Plan execution, formation, generation.", } @Article{Whang:1990:LTP, author = "Kyu-Young Whang and Brad T. {Vander-Zanden} and Howard M. Taylor", title = "A Linear-Time Probabilistic Counting Algorithm for Database Applications", journal = j-TODS, volume = "15", number = "2", pages = "208--229", month = jun, year = "1990", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; Theory/ProbAlgs.bib", note = "A probabilistic technique called linear counting, based on hashing, for counting the number of unique values in the presence of duplicates is presented in this paper.", URL = "http://www.acm.org/pubs/articles/journals/tods/1990-15-2/p208-whang/p208-whang.pdf; http://www.acm.org/pubs/citations/journals/tods/1990-15-2/p208-whang/; http://www.acm.org/pubs/toc/Abstracts/tods/78925.html", abstract = "We present a probabilistic algorithm for counting the number of unique values in the presence of duplicates. This algorithm has $O$ ($q$ ) time complexity, where $q$ is the number of values including duplicates, and produces an estimation with an arbitrary accuracy prespecified by the user using only a small amount of space. Traditionally, accurate counts of unique values were obtained by sorting, which has $O$ ($q$ log $q$ ) time complexity. Our technique, called {\em linear counting}, is based on hashing. We present a comprehensive theoretical and experimental analysis of linear counting. The analysis reveals an interesting result: A load factor (number of unique values/hash table size) much larger than 1.0 (e.g., 12) can be used for accurate estimation (e.g., 1\% of error). We present this technique with two important applications to database problems: namely, (1) obtaining the column cardinality (the number of unique values in a column of a relation) and (2) obtaining the join selectivity (the number of unique values in the join column resulting from an unconditional join divided by the number of unique join column values in the relation to he joined). These two parameters are important statistics that are used in relational query optimization and physical database design.", acknowledgement = ack-nhfb, affiliation = "Korea Adv. Inst. of Sci. and Technol., Seoul, South Korea", annote = "Counting the number of unique values in the presence of duplicates; $ O(n) $ time complexity based on hashing.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Experimentation; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; experimentation; hashing sampling TODS, algorithms; performance; theory", subject = "{\bf G.3}: Mathematics of Computing, PROBABILITY AND STATISTICS, Probabilistic algorithms (including Monte Carlo). {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods.", } @Article{Jajodia:1990:DVA, author = "Sushil Jajodia and David Mutchler", title = "Dynamic Voting Algorithms for Maintaining the Consistency of a Replicated Database", journal = j-TODS, volume = "15", number = "2", pages = "230--280", month = jun, year = "1990", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1990-15-2/p230-jajodia/p230-jajodia.pdf; http://www.acm.org/pubs/citations/journals/tods/1990-15-2/p230-jajodia/; http://www.acm.org/pubs/toc/Abstracts/tods/78926.html", abstract = "There are several replica control algorithms for managing replicated files in the face of network partitioning due to site or communication link failures. Pessimistic algorithms ensure consistency at the price of reduced availability; they permit at most one (distinguished) partition to process updates at any given time. The best known pessimistic algorithm, {\em voting}, is a ``static'' algorithm, meaning that all potential distinguished partitions can be listed in advance. We present a dynamic extension of voting called {\em dynamic voting}. This algorithm permits updates in a partition provided it contains more than half of the {\em up-to-date\/} copies of the replicated file. We also present an extension of dynamic voting called {\em dynamic voting with linearly ordered copies\/} (abbreviated as {\em dynamic-linear\/}). These algorithms are dynamic because the order in which past distinguished partitions were created plays a role in the selection of the next distinguished partition. Our algorithms have all the virtues of ordinary voting, including its simplicity, and provide improved availability as well. We provide two stochastic models to support the latter claim. In the first (site) model, sites may fail but communication links are infallible; in the second (link) model the reverse is true. We prove that under the site model, dynamic-linear has greater availability than any static algorithm, including weighted voting, if there are four or more sites in the network. In the link model, we consider all biconnected five-site networks and a wide variety of failure and repair rates. In all cases considered, dynamic-linear had greater availability than any static algorithm.", acknowledgement = ack-nhfb, affiliation = "George Mason Univ., Fairfax, VA, USA", annote = "mathematical analysis shows that dynamic-linear is better than static voting algorithms", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; design; performance; reliability", subject = "{\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf D.4.3}: Software, OPERATING SYSTEMS, File Systems Management, Distributed file systems. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems. {\bf C.4}: Computer Systems Organization, PERFORMANCE OF SYSTEMS, Reliability, availability, and serviceability.", } @Article{Dasgupta:1990:FCC, author = "Partha Dasgupta and Zvi M. Kedem", title = "The Five-Color Concurrency Control Protocol: Non-Two-Phase Locking in General Databases", journal = j-TODS, volume = "15", number = "2", pages = "281--307", month = jun, year = "1990", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "1 156 124", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1990-15-2/p281-dasgupta/p281-dasgupta.pdf; http://www.acm.org/pubs/citations/journals/tods/1990-15-2/p281-dasgupta/; http://www.acm.org/pubs/toc/Abstracts/tods/78927.html", abstract = "Concurrency control protocols based on two-phase locking are a popular family of locking protocols that preserve serializability in general (unstructured) database systems. A concurrency control algorithm (for databases with no inherent structure) is presented that is practical, non two-phase, and allows varieties of serializable logs not possible with any commonly known locking schemes. All transactions are required to predeclare the data they intend to read or write. Using this information, the protocol anticipates the existence (or absence) of possible conflicts and hence can allow non-two-phase locking.\par It is well known that serializability is characterized by acyclicity of the conflict graph representation of interleaved executions. The two-phase locking protocols allow only {\em forward\/} growth of the paths in the graph. The {\em Five Color\/} protocol allows the conflict graph to grow in any direction (avoiding two-phase constraints) and prevents cycles in the graph by maintaining transaction access information in the form of data-item markers. The read and write set information can also be used to provide relative immunity from deadlocks.", acknowledgement = ack-nhfb, affiliation = "Georgia Inst. of Technol., Atlanta, GA, USA", annote = "allow varieties of serializable logs not possible with known locking schemes; the protocol anticipates the existence of possible conflicts and hence can allow non-two-phase locking.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; design; performance; predeclared lock sets TODS", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Concurrency.", } @Article{Moore:1990:DTA, author = "James C. Moore and William B. Richmond and Andrew B. Whinston", title = "A Decision-Theoretic Approach to Information Retrieval", journal = j-TODS, volume = "15", number = "3", pages = "311--340", month = sep, year = "1990", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P20", MRnumber = "91h:68037", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1990-15-3/p311-moore/p311-moore.pdf; http://www.acm.org/pubs/citations/journals/tods/1990-15-3/p311-moore/; http://www.acm.org/pubs/toc/Abstracts/tods/88597.html", abstract = "We present the file search problem in a decision-theoretic framework, and discuss a variation of it that we call the common index problem. The goal of the common index problem is to return the best available record in the file, where {\em best\/} is in terms of a class of user preferences. We use dynamic programming to construct an optimal algorithm using two different optimality criteria, and we develop sufficient conditions for obtaining complete information.", acknowledgement = ack-nhfb, affiliation = "Purdue Univ., West Lafayette, IN, USA", annote = "searching a file for a best record rather than a specific one; the assumption is that the preferences of the ith individual can be represented as a composite where preferences are based on the same index for all users", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Economics; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; approximate algorithms; design; economics; economics of information; information retrieval; theory", subject = "{\bf E.5}: Data, FILES, Sorting/searching. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Search process.", } @Article{Westland:1990:SOC, author = "J. Christopher Westland", title = "Scaling Up Output Capacity and Performance Results from Information Systems Prototypes", journal = j-TODS, volume = "15", number = "3", pages = "341--358", month = sep, year = "1990", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1990-15-3/p341-westland/p341-westland.pdf; http://www.acm.org/pubs/citations/journals/tods/1990-15-3/p341-westland/; http://www.acm.org/pubs/toc/Abstracts/tods/87943.html", abstract = "The advantage of information system prototyping arises from its predict problems and end-user satisfaction with a system early in the development process, before significant commitments of time and effort have been made. Predictions of problems and end-user satisfaction have risen in importance with the increasing complexity of business information systems and the exponential growth of database size. This research investigates the reporting of information to an end user, and the process of inferring from a prototype to a full-scale information system. This inference is called {\em scaling up}, and is an important part of the systems development planning process. The research investigates information systems reporting from a linguistic perspective, where a database is used as a central receptacle for information storage. It then investigates the manner in which reporting statistics from the prototype information system may be used to infer the behavior and performance of the full-scale system. An example is presented for the application of the algorithm, and the final section discusses the usefulness, application, and implications of the algorithm developed in this research.", acknowledgement = ack-nhfb, affiliation = "Univ. of Southern California, Los Angeles, CA, USA", annote = "a coarse introduction, mainly the recall of information retrieval systems; the mathematics is wrong and too simple.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; design; inclusion-exclusion principle; performance", subject = "{\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval. {\bf C.4}: Computer Systems Organization, PERFORMANCE OF SYSTEMS, Measurement techniques.", } @Article{Alonso:1990:DCI, author = "Rafael Alonso and Daniel Barbara and H{\'e}ctor Garc{\'\i}a-Molina", title = "Data Caching Issues in an Information Retrieval System", journal = j-TODS, volume = "15", number = "3", pages = "359--384", month = sep, year = "1990", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1990-15-3/p359-alonso/p359-alonso.pdf; http://www.acm.org/pubs/citations/journals/tods/1990-15-3/p359-alonso/; http://www.acm.org/pubs/toc/Abstracts/tods/87848.html", abstract = "Currently, a variety of information retrieval systems are available to potential users\ldots{}. While in many cases these systems are accessed from personal computers, typically no advantage is taken of the computing resources of those machines (such as local processing and storage). In this paper we explore the possibility of using the user's local storage capabilities to cache data at the user's site. This would improve the response time of user queries albeit at the cost of incurring the overhead required in maintaining multiple copies. In order to reduce this overhead it may be appropriate to allow copies to diverge in a controlled fashion\ldots{}. Thus, we introduce the notion of quasi-copies, which embodies the ideas sketched above. We also define the types of deviations that seem useful, and discuss the available implementation strategies.", acknowledgement = ack-nhfb, affiliation = "Princeton Univ., NJ, USA", annote = "focus: cache coherency in distributed information retrieval systems one central server site, data cached at client site. Less response time, but more overhead in maintaining multiple copies. To reduce overhead, allow copies to diverge in a controlled fashion - notion of ``quasi copies'' and quasi-caching. - users can precisely define limits for divergence of quasi-copies. - reduces update propagation overhead - main difference with materialized views is that here user can establish degree of coherency - implementation possibilities: invalidate/refresh out-of-date data, include automatic expiration date etc. Paper surveys various implementation strategies and their tradeoffs. users give two types of conditions on quasi-caches: selection and coherency. - selection conditions specify which object images will be cached at the user site. - modifiers: - Add/drop (add to cache or remove) - compulsory or advisory (whether caching is to be enforced or to be taken as a hint) - query optimizer can take advantage if caching is compulsory - advisory selection gives greater system flexibility - static / dynamic (static => objects selected once when the condition is issued by a user, dynamic => changes in data cause objects to be added/dropped dynamically). - triggering delay: specifies acceptable delay for dynamic selections - coherency conditions define the allowable deviations between an object and its images. - default: image must have a valid value (though out-of-date) - delay: how much time an image may lag behind an object - version: acceptable lag of how many versions - periodic: image to be refreshed periodically - arithmetic: deviations limited by the difference between the values of the object and its image. - can also have inter-object consistency constraints. implementation issues - transmission delays and failures: ``null'' messages sent out by central site to check if client is alive etc. - what to propagate: - data message: contains new values to overwrite old ones in cache - invalidation message: only identifies invalid object to be purged from the cache, but does not contain new values. - version number message: provides new version numbers only - no new data - implicit invalidation: no message from central site, cache images automatically invalidated after a certain time. - when to propagate: - last minute: delayed until a selection/coherency condition is about to be violated. - immediately: as soon as updates occur - delayed update at central site, so no cache conditions are violated. - collapsing conditions: - possible to collapse several coherency conditions on same object into one - load balancing: central site can partially off-load enforcement of consistency to clients describes a probabilistic performance model and simulation results - simulation parameters are network traffic, query processing time, update installation time etc. conclusions: - quasi-caching can potentially improve performance and availability - problems if: - selection and consistency constraints are complex - large number of updates at central site - open issues: - how much data to cache - how does choice of when to propagate updates affect performance etc.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Management; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "cache coherency; data sharing; design; information retrieval systems; management; performance", subject = "{\bf H.3.5}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Online Information Services, Data bank sharing. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf D.4.7}: Software, OPERATING SYSTEMS, Organization and Design, Distributed systems. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems.", } @Article{Kifer:1990:CTQ, author = "Michael Kifer and Eliezer L. Lozinskii", title = "On Compile-Time Query Optimization In Deductive Databases By Means of Static Filtering", journal = j-TODS, volume = "15", number = "3", pages = "385--426", month = sep, year = "1990", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "1 081 178", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1990-15-3/p385-kifer/p385-kifer.pdf; http://www.acm.org/pubs/citations/journals/tods/1990-15-3/p385-kifer/; http://www.acm.org/pubs/toc/Abstracts/tods/87121.html", abstract = "We extend the query optimization techniques known as algebraic manipulations with relational expressions [48] to work with deductive databases. In particular, we propose a method for moving data-independent selections and projections into recursive axioms, which extends all other known techniques for performing that task [2, 3, 9, 18, 20]. We also show that, in a well-defined sense, our algorithm is optimal among the algorithms that propagate data-independent selections through recursion.", acknowledgement = ack-nhfb, affiliation = "State Univ. of New York, Stony Brook, NY, USA", annote = "Discusses algebraic optimizations for logic programs.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Management; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; dataflow; deductive databases; design; filtering; fixpoint; graph representation; inference; management; performance; projection; recursive rules; selection; theory", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Selection process. {\bf I.2.3}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Deduction and Theorem Proving, Deduction. {\bf I.2.8}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Problem Solving, Control Methods, and Search, Graph and tree search strategies.", } @Article{Agrawal:1990:DTC, author = "Rakesh Agrawal and Shaul Dar and H. V. Jagadish", title = "Direct Transitive Closure Algorithms: Design and Performance Evaluation", journal = j-TODS, volume = "15", number = "3", pages = "427--458", month = sep, year = "1990", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15 (68Q25 68R05)", MRnumber = "91h:68029", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1990-15-3/p427-agrawal/p427-agrawal.pdf; http://www.acm.org/pubs/citations/journals/tods/1990-15-3/p427-agrawal/; http://www.acm.org/pubs/toc/Abstracts/tods/88888.html", abstract = "We present new algorithms for computing transitive closure of large database relations. Unlike iterative algorithms, such as the seminaive and logarithmic algorithms, the termination of our algorithms does not depend on the length of paths in the underlying graph (hence the name {\em direct\/} algorithms). Besides reachability computations, the proposed algorithms can also be used for solving path problems. We discuss issues related to the efficient implementation of these algorithms, and present experimental results that show the direct algorithms perform uniformly better than the iterative algorithms. A side benefit of this work is that we have proposed a new methodology for evaluating the performance of recursive queries.", acknowledgement = ack-nhfb, affiliation = "AT\&T Bell Labs., Murray Hill, NJ, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Experimentation; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "deductive databases; design; experimentation; performance; query processing; recursive query processing TODS, algorithms; transitive closure", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems. {\bf I.2.8}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Problem Solving, Control Methods, and Search. {\bf F.2.2}: Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Nonnumerical Algorithms and Problems.", } @Article{Zhang:1990:NSC, author = "Weining Zhang and Clement T. Yu and Daniel Troy", title = "Necessary and Sufficient Conditions to Linearize Doubly Recursive Programs in Logic Databases", journal = j-TODS, volume = "15", number = "3", pages = "459--482", month = sep, year = "1990", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15 (68T15)", MRnumber = "91h:68036", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1990-15-3/p459-zhang/p459-zhang.pdf; http://www.acm.org/pubs/citations/journals/tods/1990-15-3/p459-zhang/; http://www.acm.org/pubs/toc/Abstracts/tods/89237.html", abstract = "Linearization of nonlinear recursive programs is an important issue in logic databases for both practical and theoretical reasons. If a nonlinear recursive program can be transformed into an equivalent linear recursive program, then it may be computed more efficiently than when the transformation is not possible. We provide a set of necessary and sufficient conditions for a simple doubly recursive program to be equivalent to a simple linear recursive program. The necessary and sufficient conditions can be verified effectively.", acknowledgement = ack-nhfb, acmcrnumber = "9211-0888", affiliation = "Illinois Univ., Chicago, IL, USA", annote = "extends authors' previous results", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "CTYU TODS, algorithms; design; logic database; theory", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf I.2.3}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Deduction and Theorem Proving, Logic programming.", } @Article{Can:1990:CEC, author = "Fazli Can and Esen A. Ozkarahan", title = "Concepts and Effectiveness of the Cover-Coefficient-Based Clustering Methodology for Text Databases", journal = j-TODS, volume = "15", number = "4", pages = "483--517", month = dec, year = "1990", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1990-15-4/p483-can/p483-can.pdf; http://www.acm.org/pubs/citations/journals/tods/1990-15-4/p483-can/; http://www.acm.org/pubs/toc/Abstracts/tods/99938.html", abstract = "A new algorithm for document clustering is introduced. The base concept of the algorithm, the cover coefficient (CC) concept, provides a means of estimating the number of clusters within a document database and related indexing and clustering analytically. The CC concept is used also to identify the cluster seeds and to form clusters with these seeds. It is shown that the complexity of the clustering process is very low. The retrieval experiments show that the information-retrieval effectiveness of the algorithm is compatible with a very demanding complete linkage clustering method that is known to have good retrieval performance. The experiments also show that the algorithm is 15.1 to 63.5 (with an average of 47.5) percent better than four other clustering algorithms in cluster-based information retrieval. The experiments have validated the indexing-clustering relationships and the complexity of the algorithm and have shown improvements in retrieval effectiveness. In the experiments two document databases are used: TODS214 and INSPEC. The latter is a common database with 12,684 documents.", acknowledgement = ack-nhfb, affiliation = "Dept. of Syst. Anal., Miami Univ., Oxford, OH, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Experimentation", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "cluster validity; clustering-indexing relationships; cover coefficient; decoupling coefficient; design; document retrieval; experimentation; Inf. retrieval TODS, algorithms; retrieval effectiveness", subject = "{\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Clustering. {\bf H.3.1}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Content Analysis and Indexing, Indexing methods. {\bf H.3.6}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Library Automation, Large text archives. {\bf I.7.0}: Computing Methodologies, TEXT PROCESSING, General.", } @Article{Nakano:1990:TOR, author = "Ryohei Nakano", title = "Translation with Optimization from Relational Calculus to Relational Algebra Having Aggregate Functions", journal = j-TODS, volume = "15", number = "4", pages = "518--557", month = dec, year = "1990", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P10", MRnumber = "1 093 243", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1990-15-4/p518-nakano/p518-nakano.pdf; http://www.acm.org/pubs/citations/journals/tods/1990-15-4/p518-nakano/; http://www.acm.org/pubs/toc/Abstracts/tods/99943.html", abstract = "Most of the previous translations of relational calculus to relational algebra aimed at proving that the two languages have the equivalent expressive power, thereby generating very complicated relational algebra expressions, especially when aggregate functions are introduced. This paper presents a rule-based translation method from relational calculus expressions having both aggregate functions and null values to optimized relational algebra expressions. Thus, logical optimization is carried out through translation. The translation method comprises two parts: the translational of the relational calculus kernel and the translation of aggregate functions. The former uses the familiar step-wise rewriting strategy, while the latter adopts a two-phase rewriting strategy via standard aggregate expressions. Each translation proceeds by applying a heuristic rewriting rule in preference to a basic rewriting rule. After introducing SQL-type null values, their impact on the translation is thoroughly investigated, resulting in several extensions of the translation. A translation experiment with many queries shows that the proposed translation method generates optimized relational algebra expressions. It is shown that heuristic rewriting rules play an essential role in the optimization. The correctness of the present translation is also shown.\par \ldots{} aggregate expressions. Each translation proceeds by applying a heuristic rewriting rule in preference to a basic rewriting rule. After introducing SQL-type null values, their impact on the translation is thoroughly investigated, resulting in several extensions of the translation. A translation experiment with many queries shows that the proposed translation method generates optimized relational", acknowledgement = ack-nhfb, affiliation = "Knowledge Syst. Lab., NTT Commun. and Inf. Process. Lab., Kanagawa, Japan", annote = "a rule-based translation method from expressions having aggregate functions being a two-phase rewriting strategy; experiment with many queries shows that heuristic rules are essential in optimization; the translation will from the front end of a database machine, MACH, developed by the author.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; languages; theory", subject = "{\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing.", } @Article{Jagadish:1990:CTM, author = "H. V. Jagadish", title = "A Compression Technique to Materialize Transitive Closure", journal = j-TODS, volume = "15", number = "4", pages = "558--598", month = dec, year = "1990", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P10", MRnumber = "1 093 244", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1990-15-4/p558-jagadish/p558-jagadish.pdf; http://www.acm.org/pubs/citations/journals/tods/1990-15-4/p558-jagadish/; http://www.acm.org/pubs/toc/Abstracts/tods/99944.html", abstract = "An important feature of database support for expert systems is the ability of the database to answer queries regarding the existence of a path from one node to another in the directed graph underlying some database relation. Given just the database relation, answering such a query is time-consuming, but given the transitive closure of the database relation a table look-up suffices. We present an indexing scheme that permits the storage of the pre-computed transitive closure of a database relation in a compressed form. The existence of a specified tuple in the closure can be determined from this compressed store by a single look-up followed by an index comparison. We show how to add nodes and arcs to the compressed closure incrementally. We also suggest how this compression technique can be used to reduce the effort required to compute the transitive closure.", acknowledgement = ack-nhfb, affiliation = "AT\&T Bell Lab., Murray Hill, NJ, USA", annote = "an indexing scheme that permits the storage of the pre-computed transitive closure", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "performance; recursive query processing TODS, algorithms", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf I.2.1}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Applications and Expert Systems. {\bf I.2.8}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Problem Solving, Control Methods, and Search.", } @Article{Omiecinski:1990:PAR, author = "Edward Omiecinski and Peter Scheuermann", title = "A Parallel Algorithm for Record Clustering", journal = j-TODS, volume = "15", number = "4", pages = "599--624", month = dec, year = "1990", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P20 (68Q25)", MRnumber = "1 093 245", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1990-15-4/p599-omiecinski/p599-omiecinski.pdf; http://www.acm.org/pubs/citations/journals/tods/1990-15-4/p599-omiecinski/; http://www.acm.org/pubs/toc/Abstracts/tods/99947.html", abstract = "We present an efficient heuristic algorithm for record clustering that can run on a SIMD machine. We introduce the P-tree, and its associated numbering scheme, which in the split phase allows each processor independently to compute the unique cluster number of a record satisfying an arbitrary query. We show that by restricting ourselves in the merge phase to combining only sibling clusters, we obtain a parallel algorithm whose speedup ratio is optimal in the number of processors used. Finally, we report on experiments showing that our method produces substantial savings in an environment with relatively little overlap among the queries.", acknowledgement = ack-nhfb, affiliation = "Sch. of Inf. and Comput. Sci., Georgia Inst. of Technol., Atlanta, GA, USA", annote = "for SIMD machine", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Experimentation; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; experimentation; performance; Physical database design TODS, algorithms", subject = "{\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Clustering. {\bf C.1.2}: Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Single-instruction-stream, multiple-data-stream processors (SIMD). {\bf F.2.2}: Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Nonnumerical Algorithms and Problems, Sequencing and scheduling. {\bf H.3.2}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Storage. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design.", } @Article{Lomet:1990:HTM, author = "David B. Lomet and Betty Salzberg", title = "The {hB-Tree}: a Multiattribute Indexing Method with Good Guaranteed Performance", journal = j-TODS, volume = "15", number = "4", pages = "625--658", month = dec, year = "1990", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1990-15-4/p625-lomet/p625-lomet.pdf; http://www.acm.org/pubs/citations/journals/tods/1990-15-4/p625-lomet/; http://www.acm.org/pubs/toc/Abstracts/tods/99949.html", abstract = "A new multiattribute index structure called the hB-tree is introduced. It is derived from the K-D-B-tree of Robinson [15] but has additional desirable properties. The hB-tree internode search and growth processes are precisely analogous to the corresponding processes in B-trees [1]. The intranode processes are unique. A k-d tree is used as the structure within nodes for very efficient searching. Node splitting requires that this k-d tree be split. This produces nodes which no longer represent brick-like regions in k-space, but that can be characterized as holey bricks, bricks in which subregions have been extracted. We present results that guarantee hB-tree users decent storage utilization, reasonable size index terms, and good search and insert performance. These results guarantee that the hB-tree copes well with arbitrary distributions of keys.", acknowledgement = ack-nhfb, annote = "multiattributable index; node splitting produces nodes that can be characterized as holey bricks", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; multi dimensional range queries TODS; performance", subject = "{\bf E.1}: Data, DATA STRUCTURES, Trees. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf H.3.2}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Storage, File organization.", } @Article{Abiteboul:1991:RBL, author = "Serge Abiteboul and St{\'e}phane Grumbach", title = "A Rule-Based Language with Functions and Sets", journal = j-TODS, volume = "16", number = "1", pages = "1--30", month = mar, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68Q45 (68N17 68P15)", MRnumber = "92a:68067", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-1/p1-abiteboul/p1-abiteboul.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-1/p1-abiteboul/; http://www.acm.org/pubs/toc/Abstracts/tods/103141.html", abstract = "A logic based language for manipulating complex objects constructed using set and tuple constructors is introduced. A key feature of the COL language is the use of base and derived data functions. Under some stratification restrictions, the semantics of programs is given by a minimal and justified model that can be computed using a finite sequence of fixpoints. The language is extended using external functions and predicates. An implementation of COL in a functional language is briefly discussed.", acknowledgement = ack-nhfb, affiliation = "INRIA, Le Chesnay, France", annote = "the COL language uses base and derived data functions", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "complex objects; deductive databases; deductive knowledge TODS, design; fixpoint semantics; knowledge bases; languages; object-oriented databases; rule based; theory", subject = "{\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design. {\bf I.2.3}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Deduction and Theorem Proving. {\bf F.4.1}: Theory of Computation, MATHEMATICAL LOGIC AND FORMAL LANGUAGES, Mathematical Logic, Logic programming.", } @Article{Mendelzon:1991:FDH, author = "Alberto O. Mendelzon and Peter T. Wood", title = "Functional dependencies in {Horn} clause queries", journal = j-TODS, volume = "16", number = "1", pages = "31--55", month = mar, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P20 (68N17)", MRnumber = "92b:68028", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-1/p31-mendelzon/p31-mendelzon.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-1/p31-mendelzon/; http://www.acm.org/pubs/toc/Abstracts/tods/103142.html", abstract = "When a database query is expressed as a set of Horn clauses whose execution is by top-down resolution of goals, there is a need to improve the backtracking behavior of the interpreter. Rather than putting on the programmer the onus of using extra-logical operators such as {\em cut\/} to improve performance, we show that some uses of the cut can be automated by inferring them from functional dependencies. This requires some knowledge of which variables are guaranteed to be bound at query execution time; we give a method for deriving such information using data flow analysis.", acknowledgement = ack-nhfb, affiliation = "Comput. Syst. Res. Inst., Toronto Univ., Ont., Canada", annote = "some uses of the cut can be automated by inferring them; this requires knowledge of which variables are bound at execution time", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data flow analysis; design; functional dependency; logic programming; performance; relational database; theory; theory deductive knowledge TODS, algorithms", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf I.2.3}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Deduction and Theorem Proving, Deduction. {\bf I.2.3}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Deduction and Theorem Proving, Logic programming.", } @Article{Meghini:1991:COF, author = "C. Meghini and C. Thanos", title = "The Complexity of Operations on a Fragmented Relation", journal = j-TODS, volume = "16", number = "1", pages = "56--87", month = mar, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15 (68Q25)", MRnumber = "92g:68036", MRreviewer = "K. Marguerite Hafen", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-1/p56-meghini/p56-meghini.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-1/p56-meghini/; http://www.acm.org/pubs/toc/Abstracts/tods/103143.html", abstract = "Data fragmentation is an important aspect of distributed database design, in which portions of relations, tailored to the specific needs of local applications, are defined to be further allocated to the sites of the computer network supporting the database system. In this paper we present a theory of fragmentation with overlapping fragments to study the complexity of the problems involved in checking the completeness of a fragmentation schema and in querying and updating a fragmented relation. We analyze these problems from the complexity viewpoint and present sound and complete algorithms for their solution.", acknowledgement = ack-nhfb, affiliation = "Istituto di Elaborazione della Inf., CNR, Pisa, Italy", annote = "Proposes a two-step process to reconstruct first minimal horizontal, then vertical covers. When fragments overlap, optimization becomes intractible.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; completeness of fragmentation schemas; design; NP-hardness; performance; query optimization; relation fragmentation; theory; updates; verification", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema. {\bf F.1.3}: Theory of Computation, COMPUTATION BY ABSTRACT DEVICES, Complexity Classes.", } @Article{Rabitti:1991:MAN, author = "Fausto Rabitti and Elisa Bertino and Won Kim and Darrell Woelk", title = "A Model of Authorization for Next-Generation Database Systems", journal = j-TODS, volume = "16", number = "1", pages = "88--131", month = mar, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-1/p88-rabitti/p88-rabitti.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-1/p88-rabitti/; http://www.acm.org/pubs/toc/Abstracts/tods/103144.html", abstract = "The conventional models of authorization have been designed for database systems supporting the hierarchical, network, and relational models of data. However, these models are not adequate for next-generation database systems that support richer data models that include object-oriented concepts and semantic data modeling concepts. Rabitti, Woelk, and Kim [14] presented a preliminary model of authorization for use as the basis of an authorization mechanism in such database systems. In this paper we present a fuller model of authorization that fills a few major gaps that the conventional models of authorization cannot fill for next-generation database systems. We also further formalize the notion of implicit authorization and refine the application of the notion of implicit authorization to object-oriented and semantic modeling concepts. We also describe a user interface for using the model of authorization and consider key issues in implementing the authorization model.", acknowledgement = ack-nhfb, affiliation = "Microelectronics and Comput. Technol. Corp., CNR, Pisa, Italy", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Security; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "object-oriented database; security; security access control TODS, design; semantic database; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf D.1.5}: Software, PROGRAMMING TECHNIQUES, Object-oriented Programming. {\bf H.2.0}: Information Systems, DATABASE MANAGEMENT, General, Security, integrity, and protection.", } @Article{Weikum:1991:PRS, author = "Gerhard Weikum", title = "Principles and Realization Strategies of Multilevel Transaction Management", journal = j-TODS, volume = "16", number = "1", pages = "132--180", month = mar, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-1/p132-weikum/p132-weikum.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-1/p132-weikum/; http://www.acm.org/pubs/toc/Abstracts/tods/103145.html", abstract = "One of the demands of database system transaction management is to achieve a high degree of concurrency by taking into consideration the semantics of high-level operations. On the other hand, the implementation of such operations must pay attention to conflicts on the storage representation levels below. To meet these requirements in a layered architecture, we propose a multilevel transaction management utilizing layer-specific semantics. Based on the theoretical notion of multilevel serializability, a family of concurrency control strategies is developed. Suitable recovery protocols are investigated for aborting single transactions and for restarting the system after a crash. The choice of levels involved in a multilevel transaction strategy reveals an inherent trade-off between increased concurrency and growing recovery costs. A series of measurements has been performed in order to compare several strategies. Preliminary results indicate considerable performance gains of the multilevel transaction approach.", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., ETH Zurich, Switzerland", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Management; Performance; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "atomicity persistence concurrency control; concurrency control recovery atomicity nested transactions TODS, algorithms; design; management; multilevel transactions; performance; persistence; reliability; serializability", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf D.4.1}: Software, OPERATING SYSTEMS, Process Management, Concurrency. {\bf H.2.7}: Information Systems, DATABASE MANAGEMENT, Database Administration, Logging and recovery. {\bf D.4.1}: Software, OPERATING SYSTEMS, Process Management, Synchronization.", } @Article{Wolfson:1991:MPR, author = "Ouri Wolfson and Amir Milo", title = "The Multicast Policy and its Relationship to Replicated Data Placement", journal = j-TODS, volume = "16", number = "1", pages = "181--205", month = mar, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15 (68M10)", MRnumber = "92a:68042", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "See corrigendum in \cite{Wolfson:1991:CMP}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-1/p181-wolfson/p181-wolfson.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-1/p181-wolfson/; http://www.acm.org/pubs/toc/Abstracts/tods/103146.html", abstract = "In this paper we consider the communication complexity of maintaining the replicas of a logical data-item, in a database distributed over a computer network. We propose a new method, called the minimum spanning tree write, by which a processor in the network should multicast a write of a logical data-item, to all the processors that store replicas of the items. Then we show that the minimum spanning tree write is optimal from the communication cost point of view. We also demonstrate that the method by which a write is multicast to all the replicas of a data-item affects the optimal replication scheme of the item, i.e., at which processors in the network the replicas should be located. Therefore, next we consider the problem of determining an optimal replication scheme for a data item, assuming that each processor employs the minimum spanning tree write at run-time. The problem for general networks is shown NP-Complete, but we provide efficient algorithms to obtain an optimal allocation scheme for three common types of network topologies. They are completely-connected, tree, and ring networks. For these topologies, efficient algorithms are also provided for the case in which reliability considerations dictate a minimum number of replicas.", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Columbia Univ., New York, NY, USA", annote = "mimumun spanning tree write and multicast to store replicas", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "181 TODS, algorithms; complexity; computer network; file allocation; message passing; NP-Complete; performance; theory", subject = "{\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf C.4}: Computer Systems Organization, PERFORMANCE OF SYSTEMS. {\bf C.2.1}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems.", } @Article{Casanova:1991:STM, author = "M. A. Casanova and A. L. Furtado and L. Tucherman", title = "A Software Tool for Modular Database Design", journal = j-TODS, volume = "16", number = "2", pages = "209--234", month = jun, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; Distributed/gesturing.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-2/p209-casanova/p209-casanova.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-2/p209-casanova/; http://www.acm.org/pubs/toc/Abstracts/tods/103711.html", abstract = "A modularization discipline for database schemas is first described. The discipline incorporates both a strategy for enforcing integrity constraints and a tactic for organizing large sets of database structures, integrity constraints, and operations. A software tool that helps the development and maintenance of database schemas modularized according to the discipline is then presented. It offers a user-friendly interface that guides the designer through the various stages of the creation of a new module or through the process of changing objects of existing modules. The tool incorporates, in a declarative style, a description of the design and redesign rules behind the modularization discipline, hence facilitating the incremental addition of new expertise about database design.", acknowledgement = ack-nhfb, affiliation = "Rio Sci. Center, IBM Brazil, Rio de Janeiro, Brazil", annote = "first modularize, then use the tool to develop and maintain schemas; functions declared to enforce constraints", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "abstract data types; consistency preservation; design; encapsulation; integrity constraints; languages; logical database design; modular design; module constructors", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema. {\bf D.2.2}: Software, SOFTWARE ENGINEERING, Tools and Techniques. {\bf D.3.3}: Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Modules, packages. {\bf H.2.7}: Information Systems, DATABASE MANAGEMENT, Database Administration.", } @Article{VanGelder:1991:STR, author = "Allen {Van Gelder} and Rodney W. Topor", title = "Safety and Translation of Relational Calculus Queries", journal = j-TODS, volume = "16", number = "2", pages = "235--278", month = jun, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P20", MRnumber = "92c:68037", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-2/p235-van_gelder/p235-van_gelder.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-2/p235-van_gelder/; http://www.acm.org/pubs/toc/Abstracts/tods/103712.html", abstract = "Not all queries in relational calculus can be answered sensibly when disjunction, negation, and universal quantification are allowed. The class of relation calculus queries or formulas that have sensible answers is called the {\em domain independent\/} class which is known to be undecidable. Subsequent research has focused on identifying large decidable subclasses of domain independent formulas. In this paper we investigate the properties of two such classes: the {\em evaluable\/} formulas and the {\em allowed\/} formulas. Although both classes have been defined before, we give simplified definitions, present short proofs of their main properties, and describe a method to incorporate equality.\par Although evaluable queries have sensible answers, it is not straightforward to compute them efficiently or correctly. We introduce {\em relational algebra normal form\/} for formulas from which form the correct translation into relational algebra is trivial. We give algorithms to transform an evaluable formula into an equivalent {\em allowed\/} formula and from there into relational algebra normal form. Our algorithms avoid use of the so-called {\em Dom\/} relation, consisting of all constants appearing in the database or the query.\par Finally, we describe a restriction under which every domain independent formula is evaluable and argue that the class of evaluable formulas is the largest decidable subclass of the domain independent formulas that can be efficiently recognized.", acknowledgement = ack-nhfb, affiliation = "California Univ., Santa Cruz, CA, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "allowed formulas; domain independence; evaluable formulas; existential normal; query translation; relational algebra; relational calculus", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf F.4.1}: Theory of Computation, MATHEMATICAL LOGIC AND FORMAL LANGUAGES, Mathematical Logic, Model theory.", } @Article{Shasha:1991:OEQ, author = "Dennis Shasha and Tsong-Li L. Wang", title = "Optimizing Equijoin Queries in Distributed Databases where Relations are Hash-Partitioned", journal = j-TODS, volume = "16", number = "2", pages = "279--308", day = "1", month = jun, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P20", MRnumber = "92c:68036", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; UnCover library database", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-2/p279-shasha/p279-shasha.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-2/p279-shasha/; http://www.acm.org/pubs/toc/Abstracts/tods/103713.html", abstract = "Consider the class of distributed database systems consisting of a set of nodes connected by a high bandwidth network. Each node consists of a processor, a random access memory, and a slower but much larger memory such as a disk. There is no shared memory among the nodes. The data are horizontally partitioned often using a hash function. Such a description characterizes many parallel or distributed database systems that have recently been proposed, both commercial and academic. We study the optimization problem that arises when the query processor must repartition the relations and intermediate results participating in a multijoin query. Using estimates of the sizes of intermediate relations, we show (1) optimum solutions for closed chain queries; (2) the NP-completeness of the optimization problem for star, tree, and general graph queries; and (3) effective heuristics for these hard cases.\par Our general approach and many of our results extend to other attribute partitioning schemes, for example, sort-partitioning on attributes, and to partitioned object databases.", acknowledgement = ack-nhfb, affiliation = "Courant Inst. of Math. Sci., New York Univ., NY, USA", annote = "No shared memory so that processor must repartition the relations and intermediate results in a multijoin query for lost hashkeys (not dynamic optimization); optimum solutions for closed chain queries, NP-completeness of star, tree, and general graph queries and effective heuristics.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; equijoin; hashing; NP-complete problems; performance; relational data models; spanning trees; systems; theory", subject = "{\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf C.4}: Computer Systems Organization, PERFORMANCE OF SYSTEMS.", } @Article{Cesarini:1991:DHM, author = "F. Cesarini and G. Soda", title = "A Dynamic Hash Method with Signature", journal = j-TODS, volume = "16", number = "2", pages = "309--337", day = "1", month = jun, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib; UnCover library database", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-2/p309-cesarini/p309-cesarini.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-2/p309-cesarini/; http://www.acm.org/pubs/toc/Abstracts/tods/103714.html", abstract = "We present a dynamic external hash method that allows retrieval of a record by only one access to mass storage while maintaining a high load factor. The hash function is based on generalized spiral storage. Both primary and overflow records are allocated to the same file, and file expansion depends on being able to allocate every overflow chain to one bucket. An in-core index, built by means of a signature function, discriminates between primary and overflow records and assures one access to storage in the case of either successful or unsuccessful searching. Simulation results confirm the good expected performance.", acknowledgement = ack-nhfb, affiliation = "Dipartimento di Sistemi e Inf., Florence Univ., Italy", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; design; dynamic hashing; external hashing; generalized spiral storage; performance; signature functions", subject = "{\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Retrieval models. {\bf E.2}: Data, DATA STORAGE REPRESENTATIONS, Hash-table representations. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval. {\bf E.5}: Data, FILES.", } @Article{King:1991:MRB, author = "Richard P. King and Nagui Halim and H{\'e}ctor Garc{\'\i}a-Molina and Christos A. Polyzois", title = "Management of a Remote Backup Copy for Disaster Recovery", journal = j-TODS, volume = "16", number = "2", pages = "338--368", month = jun, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-2/p338-king/p338-king.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-2/p338-king/; http://www.acm.org/pubs/toc/Abstracts/tods/103715.html", abstract = "A remote backup database system tracks the state of a primary system, taking over transaction processing when disaster hits the primary site. The primary and backup sites are physically isolated so that failures at one site are unlikely to propagate to the other. For correctness, the execution schedule at the backup must be equivalent to that at the primary. When the primary and backup sites contain a single processor, it is easy to achieve this property. However, this is harder to do when each site contains multiple processors and sites are connected via multiple communication lines. We present an efficient transaction processing mechanism for multiprocessor systems that guarantees this and other important properties. We also present a database initialization algorithm that copies the database to a backup site while transactions are being processed.", acknowledgement = ack-nhfb, affiliation = "IBM T.J. Watson Research Center, Yorktown Heights, NY, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; database initialization; hot spare; hot standby; reliability; remote backup", subject = "{\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf D.4.5}: Software, OPERATING SYSTEMS, Reliability, Backup procedures. {\bf H.2.7}: Information Systems, DATABASE MANAGEMENT, Database Administration, Logging and recovery.", } @Article{Gogolla:1991:TSV, author = "Martin Gogolla and Uwe Hohenstein", title = "Towards a Semantic View of an Extended Entity-Relationship Model", journal = j-TODS, volume = "16", number = "3", pages = "369--416", month = sep, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68Q55 (68P15 68Q60)", MRnumber = "1 131 140", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; Distributed/gesturing.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-3/p369-gogolla/p369-gogolla.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-3/p369-gogolla/; http://www.acm.org/pubs/toc/Abstracts/tods/111200.html", abstract = "Nearly all query languages discussed recently for the Entity-Relationship (ER) model do not possess a formal semantics. Languages are often defined by means of examples only. The reason for this phenomenon is the essential gap between features of query languages and theoretical foundations like algebras and calculi. Known languages offer arithmetic capabilities and allow for aggregates, but algebras and calculi defined for ER models do not.\par This paper introduces an extended ER model concentrating nearly all concepts of known so-called semantic data models in a few syntactical constructs. Moreover, we provide our extended ER model with a formal mathematical semantics. On this basis a well-founded calculus is developed taking into account data operations on arbitrary user-defined data types and aggregate functions. We pay special attention to arithmetic operations, as well as multivalued terms allowing nested queries, in a uniform and consistent manner. We prove our calculus only allows the formulation of safe terms and queries yielding a finite result, and to be (at least) as expressive as the relational calculi.", acknowledgement = ack-nhfb, affiliation = "Tech. Univ. Braunschweig, Germany", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "abstract data type; aggregate function; calculus; design; entity-relationship model; formal semantics; languages; relational completeness; safeness; semantic data model; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf F.3.1}: Theory of Computation, LOGICS AND MEANINGS OF PROGRAMS, Specifying and Verifying and Reasoning about Programs, Specification techniques. {\bf F.3.2}: Theory of Computation, LOGICS AND MEANINGS OF PROGRAMS, Semantics of Programming Languages. {\bf D.3.1}: Software, PROGRAMMING LANGUAGES, Formal Definitions and Theory, Semantics. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages.", } @Article{Sciore:1991:UAS, author = "Edward Sciore", title = "Using Annotations to Support Multiple Kinds of Versioning in an Object- Oriented Database System", journal = j-TODS, volume = "16", number = "3", pages = "417--438", month = sep, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-3/p417-sciore/p417-sciore.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-3/p417-sciore/; http://www.acm.org/pubs/toc/Abstracts/tods/111205.html", abstract = "The concept of {\em annotation\/} from object-oriented languages is adapted to object-oriented databases. It is shown how annotations can be used to model activities such as constraint checking, default values, and triggers. Annotations also are an appropriate way to model different versioning concepts. This paper discusses three kinds of versioning---histories, revisions, and alternatives---and demonstrates how each one can be modeled effectively using annotations. The use of annotations also allows other kinds of versioning to be defined extensibly, and arbitrary combinations of versions can be handled easily.", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Boston Coll., Chestnut Hill, MA, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "configuration management; design; languages; object-oriented databases; versions", subject = "{\bf D.3.2}: Software, PROGRAMMING LANGUAGES, Language Classifications, Object-oriented languages. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf D.1.5}: Software, PROGRAMMING TECHNIQUES, Object-oriented Programming.", } @Article{Karabeg:1991:SRC, author = "Dino Karabeg and Victor Vianu", title = "Simplification Rules and Complete Axiomatization for Relational Update Transactions", journal = j-TODS, volume = "16", number = "3", pages = "439--475", month = sep, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15 (68P20)", MRnumber = "92g:68033", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-3/p439-karabeg/p439-karabeg.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-3/p439-karabeg/; http://www.acm.org/pubs/toc/Abstracts/tods/111208.html", abstract = "Relational update transactions consisting of line programs of inserts, deletes, and modifications are studied with respect to equivalence and simplification. A sound and complete set of axioms for proving transaction equivalence is exhibited. The axioms yield a set of simplification rules that can be used to optimize efficiently a large class of transactions of practical interest. The simplification rules are particularly well suited to a dynamic environment where transactions are presented in an on-line fashion, and where the time available for optimization may consist of arbitrarily short and sparse intervals.", acknowledgement = ack-nhfb, affiliation = "California Univ., San Diego, La Jolla, CA, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; design; performance; theory", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design. {\bf F.2.2}: Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Nonnumerical Algorithms and Problems. {\bf G.2.2}: Mathematics of Computing, DISCRETE MATHEMATICS, Graph Theory, Graph algorithms.", } @Article{Yu:1991:RTR, author = "Philip S. Yu and Avraham Leff and Yann-Hang Lee", title = "On Robust Transaction Routing and Load Sharing", journal = j-TODS, volume = "16", number = "3", pages = "476--512", month = sep, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-3/p476-yu/p476-yu.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-3/p476-yu/; http://www.acm.org/pubs/toc/Abstracts/tods/111210.html", abstract = "In this paper we examine the issue of robust transaction routing in a locally distributed database environment where transaction characteristics such as reference locality imply that certain processing systems can be identified as being more suitable than others for a given transaction class. A response time based routing strategy can strike a balance between indiscriminate sharing of the load and routing based only on transaction affinity. Since response time estimates depend on workload and system parameters that may not be readily available, it is important to examine the robustness of routing decisions to information accuracy. We find that a strategy which strictly tries to minimize the response time of incoming transactions is sensitive to the accuracy of certain parameter values. On the other hand, naive strategies, that simply ignore the parameters in making routing decisions, have even worse performance. Three alternative strategies are therefore examined: threshold, discriminatory, and adaptive. Instead of just optimizing an incoming transaction's response time, the first two strategies pursue a strategy that is somewhat more oriented towards global optimization. This is achieved by being more restrictive on either the condition or the candidate for balancing the load. The third strategy, while trying to minimize the response time of individual incoming transactions, employs a feedback process to adaptively adjust future response time estimates. It monitors the discrepancy between the actual and estimated response times and introduces a correction factor based on regression analysis. All three strategies are shown to be robust with respect to the accuracy of workload and system parameters used in the response time estimation.", acknowledgement = ack-nhfb, affiliation = "IBM Thomas J. Watson Res. Center, Yorktown Heights, NY, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; distributed database; load balancing; performance analysis; performance, PSYU TODS; transaction routing", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf F.2.2}: Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Nonnumerical Algorithms and Problems, Routing and layout. {\bf D.4.8}: Software, OPERATING SYSTEMS, Performance. {\bf D.4.1}: Software, OPERATING SYSTEMS, Process Management. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems.", } @Article{Negri:1991:FSS, author = "M. Negri and G. Pelagatti and L. Sbattella", title = "Formal Semantics of {SQL} Queries", journal = j-TODS, volume = "16", number = "3", pages = "513--534", month = sep, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P20 (03B50 03B70)", MRnumber = "92i:68033", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-3/p513-negri/p513-negri.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-3/p513-negri/; http://www.acm.org/pubs/toc/Abstracts/tods/111212.html", abstract = "The semantics of SQL queries is formally defined by stating a set of rules that determine a syntax-driven translation of an SQL query to a formal model. The target model, called Extended Three Valued Predicate Calculus (E3VPC), is largely based on a set of well-known mathematical concepts. The rules which allow the transformation of a general E3VPC expression to a Canonical Form, which can be manipulated using traditional, two-valued predicate calculus are also given; in this way, problems like equivalence analysis of SQL queries are completely solved. Finally, the fact that reasoning about the equivalence of SQL queries using two-valued predicate calculus, without taking care of the real SQL semantics can lead to errors is shown, and the reasons for this are analyzed.", acknowledgement = ack-nhfb, affiliation = "Brescia Univ., Italy", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; languages; query equivalence; query semantics; SQL; theory", subject = "{\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, SQL. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Query formulation. {\bf F.3.2}: Theory of Computation, LOGICS AND MEANINGS OF PROGRAMS, Semantics of Programming Languages.", } @Article{Roussopoulos:1991:IAM, author = "Nicholas Roussopoulos", title = "An Incremental Access Method for {ViewCache}: Concept, Algorithms, and Cost Analysis", journal = j-TODS, volume = "16", number = "3", pages = "535--563", month = sep, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-3/p535-roussopoulos/p535-roussopoulos.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-3/p535-roussopoulos/; http://www.acm.org/pubs/toc/Abstracts/tods/111215.html", abstract = "A {\em ViewCache\/} is a stored collection of pointers pointing to records of underlying relations needed to materialize a view. This paper presents an {\em Incremental Access Method (IAM)\/} that amortizes the maintenance cost of ViewCaches over a long time period or indefinitely. Amortization is based on {\em deferred\/} and other update propagation strategies. A deferred update strategy allows a ViewCache to remain outdated until a query needs to selectively or exhaustively materialize the view. At that point, an incremental update of the ViewCache is performed. This paper defines a set of conditions under which incremental access to the ViewCache is cost effective. The decision criteria are based on some dynamically maintained cost parameters, which provide accurate information but require inexpensive bookkeeping.\par The IAM capitalizes on the ViewCache storage organization for performing the update and the materialization of the ViewCaches in an interleaved mode using one-pass algorithms. Compared to the standard technique for supporting views that requires reexecution of the definition of the view, the IAM offers significant performance advantages. We will show that under favorable conditions, most of which depend on the size of the incremental update logs between consecutive accesses of the views, the incremental access method outperforms query modification. Performance gains are higher for multilevel ViewCaches because all the I/O and CPU for handling intermediate results are avoided.", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Maryland Univ., College Park, MD, USA", annote = "replicated data management", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; performance; Relational Precomputation TODS, algorithms; terms", subject = "{\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages.", } @Article{Mukkamala:1991:NEC, author = "Ravi Mukkamala and Sushil Jajodia", title = "A Note on Estimating the Cardinality of the Projection of a Database Relation", journal = j-TODS, volume = "16", number = "3", pages = "564--566", month = sep, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-3/p564-mukkamala/p564-mukkamala.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-3/p564-mukkamala/; http://www.acm.org/pubs/toc/Abstracts/tods/111218.html", abstract = "The paper by Ahad et al. [1] derives an analytical expression to estimate the cardinality of the projection of a database relation. In this note, we propose to show that this expression is in error even when all the parameters are assumed to be constant. We derive the correct formula for this expression.", acknowledgement = ack-nhfb, affiliation = "Old Dominion Univ., Norfolk, VA, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "block access estimation; design; performance; query cost-estimation; relational databases", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design.", } @Article{Wolfson:1991:CMP, author = "O. Wolfson and A. Milo", title = "Corrigendum: {``The Multicast Policy and its Relationship to Replicated Data Placement'' [ACM Trans. Database Systems {\bf 16} (1991), no. 1, 181--205, by O. Wolfson and A. Milo] (MR 92a:68042)}", journal = j-TODS, volume = "16", number = "3", pages = "567--567", month = sep, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15 (68M10)", MRnumber = "1 131 143", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "See \cite{Wolfson:1991:MPR}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Hernandez:1991:CTM, author = "H{\'e}ctor J. Hern{\'a}ndez and Edward P. F. Chan", title = "Constant-Time-Maintainable {BCNF} Database Schemes", journal = j-TODS, volume = "16", number = "4", pages = "571--599", month = dec, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-4/p571-hernandez/p571-hernandez.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-4/p571-hernandez/; http://www.acm.org/pubs/toc/Abstracts/tods/115301.html", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., New Mexico State Univ., Las Cruces, NM, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; boundedness; constraint enforcement; data dependencies; design; query processing; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Normal forms. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing.", } @Article{Hou:1991:SEA, author = "Wen-Chi Hou and Gultekin {\"O}zsoyo{\u{g}}lu", title = "Statistical Estimators for Aggregate Relational Algebra Queries", journal = j-TODS, volume = "16", number = "4", pages = "600--654", month = dec, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-4/p600-hou/p600-hou.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-4/p600-hou/; http://www.acm.org/pubs/toc/Abstracts/tods/115300.html", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. and Eng. Sci., Case Western Reserve Univ., Cleveland, OH, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Management; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; management; performance; relational algebra; sampling; selectivity; simple random sampling; statistical estimators; theory", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf G.3}: Mathematics of Computing, PROBABILITY AND STATISTICS, Statistical computing. {\bf G.2.m}: Mathematics of Computing, DISCRETE MATHEMATICS, Miscellaneous. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages.", } @Article{Negri:1991:DJN, author = "M. Negri and G. Pelagatti", title = "Distributive Join: a New Algorithm for Joining Relations", journal = j-TODS, volume = "16", number = "4", pages = "655--669", month = dec, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-4/p655-negri/p655-negri.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-4/p655-negri/; http://www.acm.org/pubs/toc/Abstracts/tods/115299.html", abstract = "This paper describes a new algorithm for performing joins in the absence of access paths. This algorithm is shown to perform better than the merging scan algorithm, which can be considered the paradigm of join algorithms. Finally this algorithm is compared with another recent sub-sort-merge algorithm.", acknowledgement = ack-nhfb, affiliation = "Politecnico di Milano, Italy", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Experimentation; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Algorithm, Performance, buffer, hashing, join, merging scan, nested scan, sort, algorithms; buffer; design; experimentation; hashing; join; merging scan; nested scan; performance; sort", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf F.2.2}: Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Nonnumerical Algorithms and Problems, Sorting and searching. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods.", } @Article{Moerkotte:1991:RCC, author = "Guido Moerkotte and Peter C. Lockemann", title = "Reactive Consistency Control in Deductive Databases", journal = j-TODS, volume = "16", number = "4", pages = "670--702", month = dec, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-4/p670-moerkotte/p670-moerkotte.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-4/p670-moerkotte/; http://www.acm.org/pubs/toc/Abstracts/tods/115298.html", acknowledgement = ack-nhfb, affiliation = "Inst. fuer Programmstrukturen und Datenorganisation, Karlsruhe Univ., Germany", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Experimentation; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; design; experimentation; performance; theory", subject = "{\bf H.2.0}: Information Systems, DATABASE MANAGEMENT, General, Security, integrity, and protection. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data description languages (DDL). {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf I.2.3}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Deduction and Theorem Proving, Deduction.", } @Article{Carey:1991:CDT, author = "Michael J. Carey and Miron Livny", title = "Conflict Detection Tradeoffs for Replicated Data", journal = j-TODS, volume = "16", number = "4", pages = "703--746", month = dec, year = "1991", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1991-16-4/p703-carey/p703-carey.pdf; http://www.acm.org/pubs/citations/journals/tods/1991-16-4/p703-carey/; http://www.acm.org/pubs/toc/Abstracts/tods/115289.html", acknowledgement = ack-nhfb, affiliation = "Wisconsin Univ., Madison, WI, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Experimentation; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; concurrency control; experimentation; measurement; performance; replicated data", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf D.4.8}: Software, OPERATING SYSTEMS, Performance, Modeling and prediction. {\bf D.4.8}: Software, OPERATING SYSTEMS, Performance, Simulation. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases.", } @Article{Cattell:1992:OOB, author = "R. G. G. Cattell and J. Skeen", title = "Object Operations Benchmark", journal = j-TODS, volume = "17", number = "1", pages = "1--31", month = mar, year = "1992", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1992-17-1/p1-cattell/p1-cattell.pdf; http://www.acm.org/pubs/citations/journals/tods/1992-17-1/p1-cattell/; http://www.acm.org/pubs/toc/Abstracts/tods/128766.html", abstract = "Performance is a major issue in the acceptance of object-oriented and relational database systems aimed at engineering applications such as computer-aided software engineering (CASE) and computer-aided design (CAD). Because traditional database systems benchmarks are inappropriate to measure performance for operations on engineering objects, we designed a new benchmark Object Operations version 1 (OO1) to focus on important characteristics of these applications. OO1 is descended from an earlier benchmark for simple database operations and is based on several years experience with that benchmark. In this paper we describe the OO1 benchmark and results we obtained running it on a variety of database systems. We provide a careful specification of the benchmark, show how it can be implemented on database systems, and present evidence that more than an order of magnitude difference in performance can result from a DBMS implementation quite different from current products; minimizing overhead per database call, offloading database server functionality to workstations, taking advantage of large main memories, and using link-based methods.", acknowledgement = ack-nhfb, affiliation = "Sun Microsyst., Mountain View, CA, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Experimentation; Languages; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; CAD; CASE; client-server architecture; design; engineering database benchmark; experimentation; hypermodel; languages; measurement; object operations benchmark; object-oriented DBMS's; performance; relation of DBMS's; workstations", subject = "{\bf K.6.2}: Computing Milieux, MANAGEMENT OF COMPUTING AND INFORMATION SYSTEMS, Installation Management, Benchmarks. {\bf D.1.5}: Software, PROGRAMMING TECHNIQUES, Object-oriented Programming. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data description languages (DDL). {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data manipulation languages (DML). {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Database (persistent) programming languages. {\bf H.2.8}: Information Systems, DATABASE MANAGEMENT, Database applications. {\bf K.6.2}: Computing Milieux, MANAGEMENT OF COMPUTING AND INFORMATION SYSTEMS, Installation Management, Performance and usage measurement.", } @Article{Weddell:1992:RAF, author = "Grant E. Weddell", title = "Reasoning About Functional Dependencies Generalized for Semantic Data Models", journal = j-TODS, volume = "17", number = "1", pages = "32--64", month = mar, year = "1992", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15 (68Q55)", MRnumber = "1 161 053", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1992-17-1/p32-weddell/p32-weddell.pdf; http://www.acm.org/pubs/citations/journals/tods/1992-17-1/p32-weddell/; http://www.acm.org/pubs/toc/Abstracts/tods/128767.html", abstract = "We propose a more general form of functional dependency for semantic data models that derives from their common feature in which the separate notions of {\em domain\/} and {\em relation\/} in the relational model are combined into a single notion of {\em class}. This usually results in a richer terminological component for their query languages, whereby terms may navigate through any number of properties, including none. We prove the richer expressiveness of this more general functional dependency, and exhibit a sound and complete set of inference axioms. Although the general problem of decidability of their logical implication remains open at this time, we present decision procedures for cases in which the dependencies included in a schema correspond to keys, or in which the schema itself is acyclic. The theory is then extended to include a form of conjunctive query. Of particular significance is that the query becomes an additional source of functional dependency. Finally, we outline several applications of the theory to various problems in physical design and in query optimization. The applications derive from an ability to predict when a query can have at most one solution.", acknowledgement = ack-nhfb, affiliation = "Waterloo Univ., Ont., Canada", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; constraint theory; functional dependencies; query optimization; semantic data models; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Normal forms. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing.", } @Article{Paredaens:1992:CNA, author = "Jan Paredaens and Dirk {Van Gucht}", title = "Converting Nested Algebra Expressions into Flat Algebra Expressions", journal = j-TODS, volume = "17", number = "1", pages = "65--93", month = mar, year = "1992", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P10", MRnumber = "93c:68018", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1992-17-1/p65-paredaens/p65-paredaens.pdf; http://www.acm.org/pubs/citations/journals/tods/1992-17-1/p65-paredaens/; http://www.acm.org/pubs/toc/Abstracts/tods/128768.html", abstract = "Nested relations generalize ordinary flat relations by allowing tuple values to be either atomic or set valued. The nested algebra is a generalization of the flat relational algebra to manipulate nested relations. In this paper we study the expressive power of the nested algebra relative to its operation on flat relational databases. We show that the flat relational algebra is rich enough to extract the same ``flat information'' from a flat database as the nested algebra does. Theoretically, this result implies that recursive queries such as the transitive closure of a binary relation cannot be expressed in the nested algebra. Practically, this result is relevant to (flat) relational query optimization.", acknowledgement = ack-nhfb, affiliation = "Antwerp Univ., Belgium", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algebraic query transformation; algorithms; languages; nested algebra; nested calculus; nested relations; relational databases; theory, van Gucht relational data model TODS", subject = "{\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Query formulation. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models.", } @Article{Mohan:1992:ATR, author = "C. Mohan and Don Haderle and Bruce Lindsay and Hamid Pirahesh and Peter Schwarz", title = "{ARIES}: a Transaction Recovery Method Supporting Fine-Granularity Locking and Partial Rollbacks Using Write-Ahead Logging", journal = j-TODS, volume = "17", number = "1", pages = "94--162", month = mar, year = "1992", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: IBM Almaden Res. Ctr, Res. R. No. RJ-6649, Jan. 1989, 45 pp.", URL = "http://www.acm.org/pubs/articles/journals/tods/1992-17-1/p94-mohan/p94-mohan.pdf; http://www.acm.org/pubs/citations/journals/tods/1992-17-1/p94-mohan/; http://www.acm.org/pubs/toc/Abstracts/tods/128770.html", abstract = "DB2$^{\rm TM}$, IMS, and Tandem$^{\rm TM}$ systems. ARIES is applicable not only to database management systems but also to persistent object-oriented languages, recoverable file systems and transaction-based operating systems. ARIES has been implemented, to varying degrees, in IBM's OS/2$^{\rm TM}$ Extended Edition Database Manager, DB2, Workstation Data Save Facility/VM, Starburst and QuickSilver, and in the University of Wisconsin's EXODUS and Gamma database machine.", acknowledgement = ack-nhfb, affiliation = "IBM Almaden Res. Center, San Jose, CA, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; buffer management; design; latching; locking; performance; reliability; space management; write-ahead logging", subject = "{\bf H.2.7}: Information Systems, DATABASE MANAGEMENT, Database Administration, Logging and recovery. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Recovery and restart. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf E.5}: Data, FILES, Backup/recovery. {\bf D.4.5}: Software, OPERATING SYSTEMS, Reliability, Backup procedures.", } @Article{Badrinath:1992:SBC, author = "B. R. Badrinath and Krithi Ramamritham", title = "Semantics-Based Concurrency Control: Beyond Commutativity", journal = j-TODS, volume = "17", number = "1", pages = "163--199", month = mar, year = "1992", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "93b:68019", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1992-17-1/p163-badrinath/p163-badrinath.pdf; http://www.acm.org/pubs/citations/journals/tods/1992-17-1/p163-badrinath/; http://www.acm.org/pubs/toc/Abstracts/tods/128771.html", abstract = "The concurrency of transactions executing on atomic data types can be enhanced through the use of semantic information about operations defined on these types. Hitherto, commutativity of operations has been exploited to provide enhanced concurrency while avoiding cascading aborts. We have identified a property known as {\em recoverability\/} which can be used to decrease the delay involved in processing noncommuting operations while still avoiding cascading aborts. When an invoked operation is {\em recoverable\/} with respect to an uncommitted operation, the invoked operation can be executed by forcing a commit dependency between the invoked operation and the uncommitted operation; the transaction invoking the operation will not have to wait for the uncommitted operation to abort or commit. Further, this commit dependency only affects the order in which the operations should commit, if both commit; if either operation aborts, the other can still commit thus avoiding cascading aborts. To ensure the serializability of transactions, we force the recoverability relationship between transactions to be acyclic. Simulation studies, based on the model presented by Agrawal et al. [1], indicate that using recoverability, the turnaround time of transactions can be reduced. Further, our studies show enhancement in concurrency even when {\em resource constraints\/} are taken into consideration. The magnitude of enhancement is dependent on the resource contention; the lower the resource contention, the higher the improvement.", acknowledgement = ack-nhfb, affiliation = "Rutgers Univ., New Brunswick, NJ, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; concurrency control; performance; semantic information", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Recovery and restart. {\bf D.2.1}: Software, SOFTWARE ENGINEERING, Requirements/Specifications.", } @Article{Wang:1992:CTM, author = "Ke Wang and Marc H. Graham", title = "Constant-Time Maintainability: a Generalization of Independence", journal = j-TODS, volume = "17", number = "2", pages = "201--246", month = jun, year = "1992", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1992-17-2/p201-wang/p201-wang.pdf; http://www.acm.org/pubs/citations/journals/tods/1992-17-2/p201-wang/; http://www.acm.org/pubs/toc/Abstracts/tods/128904.html", abstract = "The {\em maintenance problem\/} of a database scheme is the following decision problem: Given a consistent database state $ \rho $ and a new tuple $u$ over some relation scheme of $ \rho $, is the modified state $ \rho \cup u $ still consistent? A database scheme is said to be {\em constant-time-maintainable(ctm)\/} if there exists an algorithm that solves its maintenance problem by making a fixed number of tuple retrievals. We present a practically useful algorithm, called the {\em canonical maintenance algorithm}, that solves the maintenance problem of all ctm database schemes within a ``not too large'' bound. A number of interesting properties are shown for ctm database schemes, among them that non-ctm database schemes are not maintainable in less than a linear time in the state size. A test method is given when only cover embedded functional dependencies (fds) appear. When the given dependencies consist of fds and the join dependency (jd) $ \bowtie {\bf R} $ of the database scheme, testing whether a database scheme is ctm is reduced to the case of cover embedded fds. When dependency-preserving database schemes with only equality-generating dependencies (egds) are considered, it is shown that every ctm database scheme has a set of dependencies that is equivalent to a set of embedded fds, and thus, our test method for the case of embedded fds can be applied. In particular, this includes the important case of lossless database schemes with only egds.", acknowledgement = ack-nhfb, affiliation = "Chongqing Univ., Sichuan, China", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; chase; constraint enforcement; design; functional dependency; independent database schemes; join dependency; lossless join; relational database; representative instance; tableau; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema. {\bf H.2.0}: Information Systems, DATABASE MANAGEMENT, General, Security, integrity, and protection. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Normal forms. {\bf F.4.1}: Theory of Computation, MATHEMATICAL LOGIC AND FORMAL LANGUAGES, Mathematical Logic, Mechanical theorem proving.", } @Article{Becker:1992:RBO, author = "Ludger Becker and Ralf Hartmut G{\"u}ting", title = "Rule-Based Optimization and Query Processing in an Extensible Geometric Database System", journal = j-TODS, volume = "17", number = "2", pages = "247--303", month = jun, year = "1992", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1992-17-2/p247-becker/p247-becker.pdf; http://www.acm.org/pubs/citations/journals/tods/1992-17-2/p247-becker/; http://www.acm.org/pubs/toc/Abstracts/tods/128905.html", abstract = "Gral is an extensible database system, based on the formal concept of a many-sorted relational algebra. Many-sorted algebra is used to define any application's query language, its query execution language, and its optimization rules. In this paper we describe Gral's optimization component. It provides (1) a sophisticated rule language --- rules are transformations of abstract algebra expressions, (2) a general optimization framework under which more specific optimization algorithms can be implemented, and (3) several control mechanisms for the application of rules. An optimization algorithm can be specified as a series of steps. Each step is defined by its own collection of rules together with a selected control strategy. \par The general facilities are illustrated by the complete design of an example optimizer --- in the form of a rule file --- for a small nonstandard query language and an associated execution language. The query language includes selection, join, ordering, embedding derived values, aggregate functions, and several geometric operations. The example shows in particular how the special processing techniques of a geometric database systems, such as spatial join methods and geometric index structures, can be integrated into query processing and optimization of a relational database system. A similar, though larger, optimizer is fully functional within the geometric database system implemented as a Gral prototype.", acknowledgement = ack-nhfb, affiliation = "Univ. Gesamthochschule Siegen, Germany", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Languages", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; extensibility; geometric query processing; languages, Guting Gral TODS; many-sorted algebra; optimization; relational algebra; rule-based optimization", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf F.2.0}: Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, General. {\bf E.2}: Data, DATA STORAGE REPRESENTATIONS, Hash-table representations. {\bf I.3.5}: Computing Methodologies, COMPUTER GRAPHICS, Computational Geometry and Object Modeling, Geometric algorithms, languages, and systems.", } @Article{Franaszek:1992:CCH, author = "Peter A. Franaszek and John T. Robinson and Alexander Thomasian", title = "Concurrency Control for High Contention Environments", journal = j-TODS, volume = "17", number = "2", pages = "304--345", month = jun, year = "1992", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1992-17-2/p304-franaszek/p304-franaszek.pdf; http://www.acm.org/pubs/citations/journals/tods/1992-17-2/p304-franaszek/; http://www.acm.org/pubs/toc/Abstracts/tods/128906.html", abstract = "Future transaction processing systems may have substantially higher levels of concurrency due to reasons which include: (1) increasing disparity between processor speeds and data access latencies, (2) large numbers of processors, and (3) distributed databases. Another influence is the trend towards longer or more complex transactions. A possible consequence is substantially more data contention, which could limit total achievable throughput. In particular, it is known that the usual locking method of concurrency control is not well suited to environments where data contention is a significant factor.\par Here we consider a number of concurrency control concepts and transaction scheduling techniques that are applicable to high contention environments, and that do not rely on database semantics to reduce contention. These include {\em access invariance\/} and its application to prefetching of data, approximations to {\em essential blocking\/} such as {\em wait depth limited\/} scheduling, and {\em phase dependent\/} control. The performance of various concurrency control methods based on these concepts are studied using detailed simulation models. The results indicate that the new techniques can offer substantial benefits for systems with high levels of data contention.", acknowledgement = ack-nhfb, affiliation = "Thomas J. Watson Res. Center, Yorktown Heights, NY, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; concurrency control; design; performance; transaction processing", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf D.4.8}: Software, OPERATING SYSTEMS, Performance. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design.", } @Article{Leng:1992:OWA, author = "Chun-Wu Roger Leng and Dik Lun Lee", title = "Optimal Weight Assignment for Signature Generation", journal = j-TODS, volume = "17", number = "2", pages = "346--373", month = jun, year = "1992", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/bibdb.bib; Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1992-17-2/p346-leng/p346-leng.pdf; http://www.acm.org/pubs/citations/journals/tods/1992-17-2/p346-leng/; http://www.acm.org/pubs/toc/Abstracts/tods/128907.html", abstract = "Previous work on superimposed coding has been characterized by two aspects. First, it is generally assumed that signatures are generated from {\em logical\/} text blocks of the same size; that is, each block contains the same number of unique terms after stopword and duplicate removal. We call this approach the fixed-size block (FSB) method, since each text block has the same size, as measured by the number of unique terms contained in it. Second, with only a few exceptions [6,7,8,9,17], most previous work has assumed that each term in the text contributes the same number of ones to the signature (i.e., the weight of the term signatures is fixed). The main objective of this paper is to derive an optimal weight assignment that assigns weights to document terms according to their occurrence and query frequencies in order to minimize the false-drop probability. The optimal scheme can account for both uniform and nonuniform occurrence and query frequencies, and the signature generation method is still based on hashing rather than on table lookup. Furthermore, a new way of generating signatures, the fixed-weight block (FWB) method, is introduced. FWB controls the weight of {\em every\/} signature to a constant, whereas in FSB, only the {\em expected\/} signature weight is constant. We have shown that FWB has a lower false-drop probability than that of the FSB method, but its storage overhead is slightly higher. Other advantages of FWB are that the optimal weight assignment can be obtained analytically without making unrealistic assumptions and that the formula for computing the term signature weights is simple and efficient.", acknowledgement = ack-nhfb, affiliation = "Ohio State Univ., Columbus, OH, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "access method; coding methods; design; document retrieval; information retrieval; optimization; performance; signature file; superimposed coding; text retrieval", subject = "{\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Retrieval models. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf H.3.6}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Library Automation. {\bf I.7.1}: Computing Methodologies, TEXT PROCESSING, Text Editing.", } @Article{Tansel:1992:MRH, author = "Abdullah U. Tansel and Lucy Garnett", title = "On {M. A. Roth, H. F. Korth and A. Silberschatz: ``Extended Algebra and Calculus for Nested Relational Databases'' [ACM Trans. Database Systems {\bf 13} (1988), no. 4, 389--417]}", journal = j-TODS, volume = "17", number = "2", pages = "374--383", month = jun, year = "1992", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "1 167 047", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "See \cite{Roth:1988:EAC}.", URL = "http://www.acm.org/pubs/articles/journals/tods/1992-17-2/p374-tansel/p374-tansel.pdf; http://www.acm.org/pubs/citations/journals/tods/1992-17-2/p374-tansel/; http://www.acm.org/pubs/toc/Abstracts/tods/128908.html", abstract = "We discuss the issues encountered in the extended algebra and calculus languages for nested relations defined by Roth, Korth, and Silberschatz.[4]. Their equivalence proof between algebra and calculus fails because of the keying problems and the use of extended set operations. Extended set operations also have unintended side effects. Furthermore, their calculus seems to allow the generation of power sets, thus making it more powerful than their algebra.", acknowledgement = ack-nhfb, affiliation = "Bilkent Univ., Ankara, Turkey", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "equivalence of algebra and calculus; languages; nested relations; relational algebra; relational calculus; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf F.4.1}: Theory of Computation, MATHEMATICAL LOGIC AND FORMAL LANGUAGES, Mathematical Logic. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Normal forms. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data manipulation languages (DML).", } @Article{Bergamaschi:1992:TRC, author = "Sonia Bergamaschi and Claudio Sartori", title = "On Taxonomic Reasoning in Conceptual Design", journal = j-TODS, volume = "17", number = "3", pages = "385--422", month = sep, year = "1992", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1992-17-3/p385-bergamaschi/p385-bergamaschi.pdf; http://www.acm.org/pubs/citations/journals/tods/1992-17-3/p385-bergamaschi/; http://www.acm.org/pubs/toc/Abstracts/tods/132272.html", abstract = "Taxonomic reasoning is a typical task performed by many AI knowledge representation systems. In this paper, the effectiveness of taxonomic reasoning techniques as an active support to knowledge acquisition and conceptual schema design is shown. The idea developed is that by extending conceptual models with {\em defined concepts\/} and giving them rigorous logic semantics, it is possible to infer {\em isa\/} relationships between concepts on the basis of their descriptions. From a theoretical point of view, this approach makes it possible to give a formal definition for {\em consistency\/} and {\em minimality\/} of a conceptual schema. From a pragmatic point of view it is possible to develop an active environment that allows automatic {\em classification\/} of a new concept in the right position of a given taxonomy, ensuring the consistency and minimality of a conceptual schema. A formalism that includes the data semantics of models giving prominence to type constructors (E/R, TAXIS, GALILEO) and algorithms for taxonomic inferences are presented: their soundness, completeness, and tractability properties are proved. Finally, an extended formalism and taxonomic inference algorithms for models giving prominence to attributes (FDM, IFO) are given.", acknowledgement = ack-nhfb, affiliation = "Bologna Univ., Italy", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; languages; schema consistency; schema minimality; semantic models; taxonomic reasoning; theory; verification", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf I.2.4}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Knowledge Representation Formalisms and Methods, Representation languages. {\bf I.2.4}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Knowledge Representation Formalisms and Methods, Frames and scripts.", } @Article{Markowitz:1992:REE, author = "Victor M. Markowitz and Arie Shoshani", title = "Representing Extended Entity-Relationship Structures in Relational Databases: a Modular Approach", journal = j-TODS, volume = "17", number = "3", pages = "423--464", month = sep, year = "1992", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1992-17-3/p423-markowitz/p423-markowitz.pdf; http://www.acm.org/pubs/citations/journals/tods/1992-17-3/p423-markowitz/; http://www.acm.org/pubs/toc/Abstracts/tods/132273.html", abstract = "A common approach to database design is to describe the structures and constraints of the database application in terms of a semantic data model, and then represent the resulting schema using the data model of a commercial database management system. Often, in practice, {\em Extended Entity-Relationship\/} (EER) schemas are translated into equivalent relational schemas. This translation involves different aspects: representing the EER schema using relational constructs, assigning names to relational attributes, normalization, and merging relations. Considering these aspects together, as is usually done in the design methodologies proposed in the literature, is confusing and leads to inaccurate results. We propose to treat separately these aspects and split the translation into four stages (modules) corresponding to the four aspects mentioned above. We define criteria for both evaluating the correctness of and characterizing the relationship between alternative relational representations of EER schemas.", acknowledgement = ack-nhfb, affiliation = "Lawrence Berkeley Lab., CA, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; database design; design; extended entity-relationship model; relational data model; schema translation; semantic data model", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Normal forms. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data description languages (DDL).", } @Article{Date:1992:SCG, author = "C. J. Date and Ronald Fagin", title = "Simple Conditions for Guaranteeing Higher Normal Forms in Relational Databases", journal = j-TODS, volume = "17", number = "3", pages = "465--476", month = sep, year = "1992", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1992-17-3/p465-date/p465-date.pdf; http://www.acm.org/pubs/citations/journals/tods/1992-17-3/p465-date/; http://www.acm.org/pubs/toc/Abstracts/tods/132274.html", abstract = "A key is {\em simple\/} if it consists of a single attribute. It is shown that if a relation schema is in third normal form and every key is simple, then it is in projection-join normal form (sometimes called fifth normal form), the ultimate normal form with respect to projections and joins. Furthermore, it is shown that if a relation schema is in Boyce-Codd normal form and {\em some\/} key is simple, then it is in fourth normal form (but not necessarily projection-join normal form). These results give the database designer simple sufficient conditions, defined in terms of functional dependencies alone, that guarantee that the schema being designed is automatically in higher normal forms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "5NF; BCNF; Boyce-Codd normal form; database design; design; fifth normal form; fourth normal form 4NF; functional dependency; join dependency; multivalued dependency; normalization; PJ/NF; projection-join normal form; relational database; simple key; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Normal forms.", } @Article{Hsu:1992:PEC, author = "Meichun Hsu and Bin Zhang", title = "Performance Evaluation of Cautious Waiting", journal = j-TODS, volume = "17", number = "3", pages = "477--512", month = sep, year = "1992", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1992-17-3/p477-hsu/p477-hsu.pdf; http://www.acm.org/pubs/citations/journals/tods/1992-17-3/p477-hsu/; http://www.acm.org/pubs/toc/Abstracts/tods/132275.html", abstract = "We study a deadlock-free locking-based concurrency control algorithm, called {\em cautious waiting}, which allows for a limited form of waiting. The algorithm is very simple to implement. We present an analytical solution to its performance evaluation based on the mean-value approach proposed by Tay et al. [18]. From the modeling point of view, we are able to do away with a major assumption used in Tay's previous work, and therefore capture more accurately both the restart and the blocking rates in the system. We show that to solve for this model we only need to solve for the root of a polynomial. The analytical tools developed enable us to see that the cautious waiting algorithm manages to achieve a {\em delicate\/} balance between restart and blocking, and therefore is superior (i.e., has higher throughput to {\em both\/} the no-waiting (i.e., immediate restart) and the general waiting algorithms) under a wide range of system parameters. The study substantiates the argument that balancing restart and blocking is important in locking systems.", acknowledgement = ack-nhfb, affiliation = "Digital Equipment Corp., Mountain View, CA, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Management; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; cautious waiting; concurrency control; management; performance, Concurrency control locking TODS", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf D.4.1}: Software, OPERATING SYSTEMS, Process Management, Concurrency.", } @Article{Abbott:1992:SRT, author = "Robert K. Abbott and H{\'e}ctor Garc{\'\i}a-Molina", title = "Scheduling Real-Time Transactions: a Performance Evaluation", journal = j-TODS, volume = "17", number = "3", pages = "513--560", month = sep, year = "1992", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1992-17-3/p513-abbott/p513-abbott.pdf; http://www.acm.org/pubs/citations/journals/tods/1992-17-3/p513-abbott/; http://www.acm.org/pubs/toc/Abstracts/tods/132276.html", acknowledgement = ack-nhfb, affiliation = "Digital Equipment Corp., Littleton, MA, USA", annote = "real-time", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; deadlines; locking protocols; performance; real-time systems", subject = "{\bf D.4.1}: Software, OPERATING SYSTEMS, Process Management, Scheduling. {\bf D.4.1}: Software, OPERATING SYSTEMS, Process Management, Concurrency. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing.", } @Article{Siegel:1992:MAR, author = "Michael Siegel and Edward Sciore and Sharon Salveter", title = "A Method for Automatic Rule Derivation to Support Semantic Query Optimization", journal = j-TODS, volume = "17", number = "4", pages = "563--600", month = dec, year = "1992", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P20 (68T05)", MRnumber = "1 197 198", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1992-17-4/p563-siegel/p563-siegel.pdf; http://www.acm.org/pubs/citations/journals/tods/1992-17-4/p563-siegel/; http://www.acm.org/pubs/toc/Abstracts/tods/146932.html", abstract = "The use of inference rules to support intelligent data processing is an increasingly important tool in many areas of computer science. In database systems, rules are used in semantic query optimization as a method for reducing query processing costs. The savings is dependent on the ability of experts to supply a set of useful rules and the ability of the optimizer to quickly find the appropriate transformations generated by these rules. Unfortunately, the most useful rules are not always those that would or could be specified by an expert. This paper describes the architecture of a system having two interrelated components: a combined conventional/semantic query optimizer, and an automatic rule deriver.\par Our automatic rule derivation method uses intermediate results from the optimization process to direct the search for learning new rules. Unlike a system employing only user-specified rules, a system with an automatic capability can derive rules that may be true only in the current state of the database and can modify the rule set to reflect changes in the database and its usage pattern.\par This system has been implemented as an extension of the EXODUS conventional query optimizer generator. We describe the implementation, and show how semantic query optimization is an extension of conventional optimization in this context.", acknowledgement = ack-nhfb, affiliation = "Boston Univ., MA, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Languages; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "integrity constraint; languages; learning; performance; transformation heuristic", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf I.2.6}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Learning, Knowledge acquisition.", } @Article{Kamel:1992:IDC, author = "Nabil Kamel and Roger King", title = "Intelligent Database Caching Through the Use of Page Answers and Page Traces", journal = j-TODS, volume = "17", number = "4", pages = "601--646", month = dec, year = "1992", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P20", MRnumber = "1 197 199", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1992-17-4/p601-kamel/p601-kamel.pdf; http://www.acm.org/pubs/citations/journals/tods/1992-17-4/p601-kamel/; http://www.acm.org/pubs/toc/Abstracts/tods/146933.html", abstract = "In this paper a new method to improve the utilization of main memory systems is presented. The new method is based on prestoring in main memory a number of query answers, each evaluated out of a single memory page. To this end, the ideas of page-answers and page-traces are formally described and their properties analyzed. The query model used here allows for selection, projection, join, recursive queries as well as arbitrary combinations. We also show how to apply the approach under update traffic. This concept is especially useful in managing the main memories of an important class of applications. This class includes the evaluation of triggers and alerters, performance improvement of rule-based systems, integrity constraint checking, and materialized views. These applications are characterized by the existence at compile time of a predetermined set of queries, by a slow but persistent update traffic, and by their need to repetitively reevaluate the query set. The new approach represents a new type of intelligent database caching, which contrasts with traditional caching primarily in that the cache elements are derived data and as a consequence, they overlap arbitrarily and do not have a fixed length. The contents of the main memory cache are selected based on the data distribution within the database, the set of fixed queries to preprocess, and the paging characteristics. Page-answers and page-traces are used as the smallest indivisible units in the cache. An efficient heuristic to select a near optimal set of page-answers and page-traces to populate the main memory has been developed, implemented, and tested. Finally, quantitative measurements of performance benefits are reported.", acknowledgement = ack-nhfb, affiliation = "Florida Univ., Gainesville, FL, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; artificial intelligence; databases; design; page access; performance", subject = "{\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.3.1}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Content Analysis and Indexing, Indexing methods. {\bf H.3.2}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Storage, Record classification. {\bf I.1.3}: Computing Methodologies, ALGEBRAIC MANIPULATION, Languages and Systems, Evaluation strategies. {\bf I.2.8}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Problem Solving, Control Methods, and Search, Plan execution, formation, generation.", } @Article{Maiocchi:1992:ADT, author = "Roberto Maiocchi and Barbara Pernici and Federico Barbic", title = "Automatic Deduction of Temporal Information", journal = j-TODS, volume = "17", number = "4", pages = "647--688", month = dec, year = "1992", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P20 (03B70 68T27)", MRnumber = "93h:68038", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1992-17-4/p647-maiocchi/p647-maiocchi.pdf; http://www.acm.org/pubs/citations/journals/tods/1992-17-4/p647-maiocchi/; http://www.acm.org/pubs/toc/Abstracts/tods/146934.html", abstract = "In many computer-based applications, temporal information has to be stored, retrieved, and related to other temporal information. Several time models have been proposed to manage temporal knowledge in the fields of conceptual modeling, database systems, and artificial intelligence.\par In this paper we present TSOS, a system for reasoning about time that can be integrated as a time expert in environments designed for broader problem-solving domains. The main intended goal of TSOS is to allow a user to infer further information on the temporal data stored in the database through a set of deduction rules handling various aspects of time. For this purpose, TSOS provides the capability of answering queries about the temporal specifications it has in its temporal database.\par Distinctive time-modeling features of TSOS are the introduction of {\em temporal modalities}, i.e., the possibility of specifying if a piece of information is always true within a time interval, or if it is only sometimes true, and the capability of answering about the possibility and the necessity of the validity of some information at a given time, the association of temporal knowledge both to {\em instances of data\/} and to {\em types of data}, and the development of a {\em time calculus\/} for reasoning on temporal data. Another relevant feature of TSOS is the capability to reason about temporal data specified at different time granularities.", acknowledgement = ack-nhfb, affiliation = "Politecnico di Milano, Italy", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; events; languages; meta-level temporal assertions; propositions; temporal database; temporal modalities; theory; time calculus", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf I.2.3}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Deduction and Theorem Proving, Deduction.", } @Article{Agrawal:1992:GTQ, author = "D. Agrawal and A. {El Abbadi}", title = "The Generalized Tree Quorum Protocol: An Efficient Approach for Managing Replicated Data", journal = j-TODS, volume = "17", number = "4", pages = "689--717", month = dec, year = "1992", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68M10", MRnumber = "1 197 201", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1992-17-4/p689-agrawal/p689-agrawal.pdf; http://www.acm.org/pubs/citations/journals/tods/1992-17-4/p689-agrawal/; http://www.acm.org/pubs/toc/Abstracts/tods/146935.html", abstract = "In this paper, we present a low-cost fault-tolerant protocol for managing replicated data. We impose a logical tree structure on the set of copies of an object and develop a protocol that uses the information available in the logical structure to reduce the communication requirements for read and write operations. The tree quorum protocol is a generalization of the static voting protocol with two degrees of freedom for choosing quorums. In general, this results in significantly lower communication costs for comparable data availability. The protocol exhibits the property of graceful degradation, i.e., communication costs for executing operations are minimal in a failure-free environment but may increase as failures occur. This approach in designing distributed systems is desirable since it provides fault-tolerance without imposing unnecessary costs on the failure-free mode of operations.", acknowledgement = ack-nhfb, affiliation = "California Univ., Santa Barbara, CA, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Economics; Experimentation; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; economics; experimentation; measurement; performance", subject = "{\bf C.2.2}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design. {\bf G.2.2}: Mathematics of Computing, DISCRETE MATHEMATICS, Graph Theory, Network problems. {\bf G.2.2}: Mathematics of Computing, DISCRETE MATHEMATICS, Graph Theory, Trees.", } @Article{Atzeni:1992:URD, author = "Paolo Atzeni and Riccardo Torlone", title = "Updating Relational Databases Through Weak Instance Interfaces", journal = j-TODS, volume = "17", number = "4", pages = "718--745", month = dec, year = "1992", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P20", MRnumber = "93h:68035", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1992-17-4/p718-atzeni/p718-atzeni.pdf; http://www.acm.org/pubs/citations/journals/tods/1992-17-4/p718-atzeni/; http://www.acm.org/pubs/toc/Abstracts/tods/146936.html", abstract = "The problem of updating databases through interfaces based on the weak instance model is studied, thus extending previous proposals that considered them only from the query point of view. Insertions and deletions of tuples are considered.\par As a preliminary tool, a lattice on states is defined, based on the information content of the various states.\par Potential results of an insertion are states that contain at least the information in the original state and that in the new tuple. Sometimes there is no potential result, and in the other cases there may be many of them. We argue that the insertion is deterministic if the state that contains the information common to all the potential results (the greatest lower bound, in the lattice framework) is a potential result itself. Effective characterizations for the various cases exist.\par A symmetric approach is followed for deletions, with fewer cases, since there are always potential results; determinism is characterized as a consequence.", acknowledgement = ack-nhfb, affiliation = "Roma Univ., Italy", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema. {\bf H.2.m}: Information Systems, DATABASE MANAGEMENT, Miscellaneous. {\bf H.5.2}: Information Systems, INFORMATION INTERFACES AND PRESENTATION, User Interfaces, Theory and methods.", } @Article{Ishikawa:1993:MLI, author = "Hiroshi Ishikawa and Fumio Suzuki and Fumihiko Kozakura and Akifumi Makinouchi and Mika Miyagishima and Yoshio Izumida and Masaaki Aoshima and Yasuo Yamane", title = "The Model, Language, and Implementation of an Object-Oriented Multimedia Knowledge Base Management System", journal = j-TODS, volume = "18", number = "1", pages = "1--50", month = mar, year = "1993", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1993-18-1/p1-ishikawa/p1-ishikawa.pdf; http://www.acm.org/pubs/citations/journals/tods/1993-18-1/p1-ishikawa/; http://www.acm.org/pubs/toc/Abstracts/tods/151285.html", abstract = "New applications such as CAD, AI, and hypermedia require direct representation and flexible use of complex objects, behavioral knowledge, and multimedia data. To this end, we have devised a knowledge base management system called Jasmine. An object-oriented approach in a programming language also seems promising for use in Jasmine. Jasmine extends the current object-oriented approach and provides the following features. Our object model is based on functional data models and well-established set theory. Attributes or functions composing objects can represent both structural and behavioral knowledge. The object model can represent incomplete and generic knowledge. The model can support the basic storage and operations of multimedia data. The facets of attributes can flexibly represent constraints and triggers. The object manipulation language can support associative access of objects. The structural and behavioral knowledge can be uniformly treated to allow the user to specify complex object operations in a compact manner. The user-defined and system-defined attributes can be uniformly specified to ease user customization of the language. The classes and instances can be uniformly accessed. Incomplete knowledge can be flexibly accessed. The system has a layered architecture. Objects are stored in nested relations provided by extensive DBMS as a sublayer. User query of objects is compiled into relational operations such as select and join, which can be efficiently processed using hashing. The behavioral knowledge is compiled into predicate and manipulation function interfaces that can directly access tuples in a buffer.", acknowledgement = ack-nhfb, affiliation = "Fujitsu Labs., Ltd., Kawasaki, Japan", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; languages", subject = "{\bf I.2.1}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Applications and Expert Systems. {\bf I.2.4}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Knowledge Representation Formalisms and Methods, Representations (procedural and rule-based). {\bf H.4.2}: Information Systems, INFORMATION SYSTEMS APPLICATIONS, Types of Systems. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf H.5.1}: Information Systems, INFORMATION INTERFACES AND PRESENTATION, Multimedia Information Systems.", } @Article{Johnson:1993:PCB, author = "Theodore Johnson and Dennis Shasha", title = "The Performance of Current {B-Tree} Algorithms", journal = j-TODS, volume = "18", number = "1", pages = "51--101", month = mar, year = "1993", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "``Current'' in the title should be ``Concurrent''.", URL = "http://www.acm.org/pubs/articles/journals/tods/1993-18-1/p51-johnson/p51-johnson.pdf; http://www.acm.org/pubs/citations/journals/tods/1993-18-1/p51-johnson/; http://www.acm.org/pubs/toc/Abstracts/tods/151286.html", acknowledgement = ack-nhfb, affiliation = "Florida Univ., Gainesville, FL, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Experimentation; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "B-trees; concurrent B-trees; concurrent data structures; experimentation; measurement; performance; performance of concurrent algorithms; TOC Concurrency control simulations TODS, algorithms", subject = "{\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Retrieval models. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Search process. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design. {\bf F.2.2}: Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Nonnumerical Algorithms and Problems, Sorting and searching. {\bf I.6.6}: Computing Methodologies, SIMULATION AND MODELING, Simulation Output Analysis.", } @Article{Kumar:1993:CAT, author = "Akhil Kumar and Arie Segev", title = "Cost and Availability Tradeoffs in Replicated Data Concurrency Control", journal = j-TODS, volume = "18", number = "1", pages = "102--131", month = mar, year = "1993", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1993-18-1/p102-kumar/p102-kumar.pdf; http://www.acm.org/pubs/citations/journals/tods/1993-18-1/p102-kumar/; http://www.acm.org/pubs/toc/Abstracts/tods/151287.html", acknowledgement = ack-nhfb, affiliation = "Cornell Univ., Ithaca, NY, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; availability; performance; replicated database", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases.", } @Article{Abdel-Ghaffar:1993:ODA, author = "Khaled A. S. Abdel-Ghaffar and Amr {El Abbadi}", title = "Optimal Disk Allocation for Partial Match Queries", journal = j-TODS, volume = "18", number = "1", pages = "132--156", month = mar, year = "1993", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1993-18-1/p132-abdel-ghaffar/p132-abdel-ghaffar.pdf; http://www.acm.org/pubs/citations/journals/tods/1993-18-1/p132-abdel-ghaffar/; http://www.acm.org/pubs/toc/Abstracts/tods/151288.html", abstract = "The problem of disk allocation addresses the issue of how to distribute a file on several disks in order to maximize concurrent disk accesses in response to a partial match query. In this paper a coding-theoretic analysis of this problem is presented, and both necessary and sufficient conditions for the existence of strictly optimal allocation methods are provided. Based on a class of optimal codes, known as maximum distance separable codes, strictly optimal allocation methods are constructed. Using the necessary conditions proved, we argue that the standard definition of strict optimality is too strong and cannot be attained, in general. Hence, we reconsider the definition of optimality. Instead of basing it on an abstract definition that may not be attainable, we propose a new definition based on the best possible allocation method. Using coding theory, allocation methods that are optimal according to our proposed criterion are developed.", acknowledgement = ack-nhfb, affiliation = "California Univ., Davis, CA, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Abbadi TODS, algorithms; Cartesian product files; coding theory; design; multiple disk systems; partial match queries; theory", subject = "{\bf H.3.2}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Storage, File organization. {\bf D.4.3}: Software, OPERATING SYSTEMS, File Systems Management, File organization. {\bf E.5}: Data, FILES, Organization/structure. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf H.1.1}: Information Systems, MODELS AND PRINCIPLES, Systems and Information Theory, Information theory. {\bf E.4}: Data, CODING AND INFORMATION THEORY.", } @Article{Matsliach:1993:PAF, author = "Gabriel Matsliach", title = "Performance Analysis of File Organizations that Use Multibucket Data Leaves with Partial Expansions", journal = j-TODS, volume = "18", number = "1", pages = "157--180", month = mar, year = "1993", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1993-18-1/p157-matsliach/p157-matsliach.pdf; http://www.acm.org/pubs/citations/journals/tods/1993-18-1/p157-matsliach/; http://www.acm.org/pubs/toc/Abstracts/tods/151289.html", abstract = "We present an exact performance analysis, under random insertions, of file organizations that use multibucket data leaves and perform partial expansions before splitting. We evaluate the expected disk space utilization of the file and show how the expected search and insert costs can be estimated. The analytical results are confirmed by simulations. The analysis can be used to investigate both the dynamic and the asymptotic behaviors.", acknowledgement = ack-nhfb, affiliation = "Technion-Israel Inst. of Technol., Haifa, Israel", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Performance; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "bounded disorder files; multibucket data leaves; partial expansion; performance; performance analysis; search structures; theory; verification", subject = "{\bf E.5}: Data, FILES, Organization/structure. {\bf E.1}: Data, DATA STRUCTURES, Trees. {\bf H.3.2}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Storage, File organization. {\bf H.3.1}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Content Analysis and Indexing. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval.", } @Article{Chomicki:1993:FRI, author = "Jan Chomicki and Tomasz Imieli{\'n}ski", title = "Finite Representation of Infinite Query Answers", journal = j-TODS, volume = "18", number = "2", pages = "181--223", month = jun, year = "1993", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1993-18-2/p181-chomicki/p181-chomicki.pdf; http://www.acm.org/pubs/citations/journals/tods/1993-18-2/p181-chomicki/; http://www.acm.org/pubs/toc/Abstracts/tods/151635.html", abstract = "We define here a formal notion of finite representation of infinite query answers in logic programs. We apply this notion to Datalog$_{\rm nS}$ programs may be infinite and consequently queries may have infinite answers.\par We present a method to finitely represent infinite least Herbrand models of Datalog$_{\rm nS}$ program (and its underlying computational engine) can be forgotten. Given a query to be evaluated, it is easy to obtain from the relational specification finitely many answer substitutions that represent infinitely many answer substitutions to the query. The method involved is a combination of a simple, unificationless, computational mechanism (graph traversal, congruence closure, or term rewriting) and standard relational query evaluation methods. Second, a relational specification is {\em effectively computable\/} and its computation is no harder, in the sense of the complexity class, than answering yes-no queries.\par Our method is applicable to every range-restricted Datalog$_{\rm nS}$ program. We also show that for some very simple non-Datalog$_{\rm nS}$ logic programs, finite representations of query answers do not exist.", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. and Inf. Sci., Kansas State Univ., Manhattan, KS, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; theory", subject = "{\bf F.2.2}: Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Nonnumerical Algorithms and Problems. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Datalog. {\bf I.2.3}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Deduction and Theorem Proving, Deduction. {\bf F.4.1}: Theory of Computation, MATHEMATICAL LOGIC AND FORMAL LANGUAGES, Mathematical Logic.", } @Article{Hou:1993:PTC, author = "Wen-Chi Hou and Gultekin {\"O}zsoyo{\u{g}}lu", title = "Processing Time-Constrained Aggregate Queries in {CASE-DB}", journal = j-TODS, volume = "18", number = "2", pages = "224--261", month = jun, year = "1993", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1993-18-2/p224-hou/p224-hou.pdf; http://www.acm.org/pubs/citations/journals/tods/1993-18-2/p224-hou/; http://www.acm.org/pubs/toc/Abstracts/tods/151636.html", abstract = "In this paper, we present an algorithm to strictly control the time to process an estimator for an aggregate relational query. The algorithm implemented in a prototype database management system, called CASE-DB, iteratively samples from input relations, and evaluates the associated estimator until the time quota expires.\par In order to estimate the time cost of a query, CASE-DB uses adaptive time cost formulas. The formulas are adaptive in that the parameters of the formulas can be adjusted at runtime to better fit the characteristics of a query. To control the use of time quota, CASE-DB adopts the one-at-a-time-interval time control strategy to make a tradeoff between the risks of overspending and the overhead, finally, experimental evaluation of the methodology is presented.", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Southern Illinois Univ., Carbondale, IL, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; design; estimation; performance; relational algebra; risk of overspending; sampling; selectivity; theory; time constraints", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf G.3}: Mathematics of Computing, PROBABILITY AND STATISTICS, Statistical computing. {\bf H.2.8}: Information Systems, DATABASE MANAGEMENT, Database applications. {\bf J.7}: Computer Applications, COMPUTERS IN OTHER SYSTEMS, Real time.", } @Article{Drenick:1993:SQO, author = "P. E. Drenick and E. J. Smith", title = "Stochastic Query Optimization in Distributed Databases", journal = j-TODS, volume = "18", number = "2", pages = "262--288", month = jun, year = "1993", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1993-18-2/p262-drenick/p262-drenick.pdf; http://www.acm.org/pubs/citations/journals/tods/1993-18-2/p262-drenick/; http://www.acm.org/pubs/toc/Abstracts/tods/151637.html", abstract = "Many algorithms have been devised for minimizing the costs associated with obtaining the answer to a single, isolated query in a distributed database system. However, if more than one query may be processed by the system at the same time and if the arrival times of the queries are unknown, the determination of optimal query-processing strategies becomes a stochastic optimization problem. In order to cope with such problems, a theoretical state-transition model is presented that treats the system as one operating under a stochastic load. Query-processing strategies may then be distributed over the processors of a network as probability distributions, in a manner which accommodates many queries over time.\par It is then shown that the model leads to the determination of optimal query-processing strategies as the solution of mathematical programming problems, and analytical results for several examples are presented. Furthermore, a divide-and-conquer approach is introduced for decomposing stochastic query optimization problems into distinct subproblems for processing queries sequentially and in parallel.", acknowledgement = ack-nhfb, affiliation = "Polytech. Univ., Farmingdale, NY, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; distributed query processing; performance; state-transition model; stochastic query optimization; theory", subject = "{\bf G.1.6}: Mathematics of Computing, NUMERICAL ANALYSIS, Optimization, Linear programming. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Retrieval models. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases.", } @Article{Rothermel:1993:OCP, author = "Kurt Rothermel and Stefan Pappe", title = "Open Commit Protocols Tolerating Commission Failures", journal = j-TODS, volume = "18", number = "2", pages = "289--332", month = jun, year = "1993", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1993-18-2/p289-rothermel/p289-rothermel.pdf; http://www.acm.org/pubs/citations/journals/tods/1993-18-2/p289-rothermel/; http://www.acm.org/pubs/toc/Abstracts/tods/151638.html", abstract = "To ensure atomicity of transactions in distributed systems so-called 2-phase commit (2PC) protocols have been proposed. The basic assumption of these protocols is that the processing nodes involved in transactions are ``sane,'' i.e., they only fail with omission failures, and nodes eventually recover from failures. Unfortunately, this assumption is not realistic for so-called Open Distributed Systems (ODSs), in which nodes may have totally different reliability characteristics. In ODSs, nodes can be classified into trusted nodes (e.g., a banking server) and nontrusted nodes (e.g., a home PC requesting a remote banking service). While trusted nodes are assumed to be sane, nontrusted nodes may fail permanently and even cause commission failures to occur.\par In this paper, we propose a family of 2PC protocols that tolerate any number of omission failures at trusted nodes and any number of commission and omission failures at nontrusted nodes. The proposed protocols ensure that (at least) the trusted nodes participating in a transaction {\em eventually\/} terminate the transaction in a {\em consistent\/} manner. Unlike Byzantine commit protocols, our protocols do {\em not\/} incorporate mechanisms for achieving Byzantine agreement, which has advantages in terms of complexity: Our protocols have the same or only a slightly higher message complexity than traditional 2PC protocols.", acknowledgement = ack-nhfb, affiliation = "Inst. of Parallel and Distributed High Performance Syst., Stuttgart Univ., Germany", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; commit protocols; open systems; performance; reliability", subject = "{\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Network operating systems. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf C.4}: Computer Systems Organization, PERFORMANCE OF SYSTEMS, Reliability, availability, and serviceability.", } @Article{Rahm:1993:EPE, author = "Erhard Rahm", title = "Empirical Performance Evaluation of Concurrency and Coherency Control Protocols for Database Sharing Systems", journal = j-TODS, volume = "18", number = "2", pages = "333--377", month = jun, year = "1993", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; Database/Wiederhold.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1993-18-2/p333-rahm/p333-rahm.pdf; http://www.acm.org/pubs/citations/journals/tods/1993-18-2/p333-rahm/; http://www.acm.org/pubs/toc/Abstracts/tods/151639.html", abstract = "Database Sharing (DB-sharing) refers to a general approach for building a distributed high performance transaction system. The nodes of a DB-sharing system are locally coupled via a high-speed interconnect and share a common database at the disk level. This is also known as a ``shared disk'' approach. We compare database sharing with the database partitioning (shared nothing) approach and discuss the functional DBMS components that require new and coordinated solutions for DB-sharing. The performance of DB-sharing systems critically depends on the protocols used for concurrency and coherency control. The frequency of communication required for these functions has to be kept as low as possible in order to achieve high transaction rates and short response times. A trace-driven simulation system for DB-sharing complexes has been developed that allows a realistic performance comparison of four different concurrency and coherency control protocols. We consider two locking and two optimistic schemes which operate either under central or distributed control. For coherency control, we investigate so-called on-request and broadcast invalidation schemes, and employ buffer-to-buffer communication to exchange modified pages directly between different nodes. The performance impact of random routing versus affinity-based load distribution and different communication costs is also examined. In addition, we analyze potential performance bottlenecks created by hot spot pages.", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Kaiserslautern Univ., Germany", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; coherency control; concurrency control; database partitioning; database sharing; design; performance; performance analysis; shared disk; shared nothing; theory; trace-driven simulation", subject = "{\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf C.4}: Computer Systems Organization, PERFORMANCE OF SYSTEMS. {\bf D.4.8}: Software, OPERATING SYSTEMS, Performance, Simulation. {\bf D.4.1}: Software, OPERATING SYSTEMS, Process Management, Concurrency.", } @Article{Kuper:1993:LDM, author = "Gabriel M. Kuper and Moshe Y. Vardi", title = "The Logical Data Model", journal = j-TODS, volume = "18", number = "3", pages = "379--413", month = sep, year = "1993", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "96c:68035", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1993-18-3/p379-kuper/p379-kuper.pdf; http://www.acm.org/pubs/citations/journals/tods/1993-18-3/p379-kuper/; http://www.acm.org/pubs/toc/Abstracts/tods/155274.html", abstract = "We propose an object-oriented data model that generalizes the relational, hierarchical, and network models. A database scheme in this model is a directed graph, whose leaves represent data and whose internal nodes represent connections among the data. Instances are constructed from objects, which have separate names and values. We define a logic for the model, and describe a nonprocedural query language that is based on the logic. We also describe an algebraic query language and show that it is equivalent to the logical language.", acknowledgement = ack-nhfb, affiliation = "IBM Thomas J. Watson Res. Center, Yorktown Heights, NY, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Algebra; database schema; design; languages; logic; relational database; theory; tuple calculus", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema.", } @Article{Levene:1993:SNE, author = "Mark Levene and George Loizou", title = "Semantics for Null Extended Nested Relations", journal = j-TODS, volume = "18", number = "3", pages = "414--459", month = sep, year = "1993", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1993-18-3/p414-levene/p414-levene.pdf; http://www.acm.org/pubs/citations/journals/tods/1993-18-3/p414-levene/; http://www.acm.org/pubs/toc/Abstracts/tods/155275.html", abstract = "The nested relational model extends the flat relational model by relaxing the first normal form assumption in order to allow the modeling of complex objects. Much of the previous work on the nested relational model has concentrated on defining the data structures and query language for the model. The work done on integrity constraints in nested relations has mainly focused on characterizing subclasses of nested relations and defining normal forms for nested relations with certain desirable properties.\par In this paper we define the semantics of nested relations, which may contain null values, in terms of integrity constraints, called {\em null extended data dependencies}, which extend functional dependencies and join dependencies encountered in flat relational database theory. We formalize incomplete information in nested relations by allowing only one unmarked {\em generic null value}, whose semantics we do not further specify. The motivation for the choice of a generic null is our desire to investigate only fundamental semantics which are common to all unmarked null types. This lead us to define a preorder on nested relations, which allows us to measure the relative information content of nested relations. We also define a procedure, called the {\em extended chase procedure}, for testing satisfaction of null extended data dependencies and for making inferences by using these null extended data dependencies. The extended chase procedure is shown to generalize the classical chase procedure, which is of major importance in flat relational database theory. As a consequence of our approach we are able to capture the novel notion of losslessness in nested relations, called herein {\em null extended lossless decomposition}. Finally, we show that the semantics of nested relations are a natural extension of the semantics of flat relations.", acknowledgement = ack-nhfb, affiliation = "Univ. Coll., London, UK", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; extended chase; languages; nested relations; null extended algebra; null extended data dependencies; nulls; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Normal forms.", } @Article{Agrawal:1993:COS, author = "Divyakant Agrawal and Amr {El Abbadi} and Ambuj K. Singh", title = "Consistency and Orderability: Semantics-Based Correctness Criteria for Databases", journal = j-TODS, volume = "18", number = "3", pages = "460--486", month = sep, year = "1993", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1993-18-3/p460-agrawal/p460-agrawal.pdf; http://www.acm.org/pubs/citations/journals/tods/1993-18-3/p460-agrawal/; http://www.acm.org/pubs/toc/Abstracts/tods/155276.html", abstract = "The semantics of objects and transactions in database systems are investigated. User-defined predicates called {\em consistency assertions\/} are used to specify user programs. Three new correctness criteria are proposed. The first correctness criterion {\em consistency\/} is based solely on the users' specifications and admit nonserializable executions that are acceptable to the users. Integrity constraints of the database are maintained through consistency assertions. The second correctness criterion {\em orderability\/} is a generalization of view serializability and represents a weak notion of equivalence to a serial schedule. Finally, the third correctness criterion {\em strong order-ability\/} is introduced as a generalization of conflict serializability. Unlike consistency, the notions of orderability allow users to operate an isolation as maintenance of the integrity constraint now becomes the responsibility of the database system.", acknowledgement = ack-nhfb, affiliation = "California Univ., Santa Barbara, CA, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Abbadi TODS, theory; concurrency control; object-oriented databases; semantics; serializability theory; verification", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf D.2.4}: Software, SOFTWARE ENGINEERING, Program Verification, Correctness proofs. {\bf F.3.1}: Theory of Computation, LOGICS AND MEANINGS OF PROGRAMS, Specifying and Verifying and Reasoning about Programs, Assertions.", } @Article{Sagiv:1993:SQT, author = "Yehoshua Sagiv and Oded Shmueli", title = "Solving Queries by Tree Projections", journal = j-TODS, volume = "18", number = "3", pages = "487--511", month = sep, year = "1993", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15", MRnumber = "96c:68038", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1993-18-3/p487-sagiv/p487-sagiv.pdf; http://www.acm.org/pubs/citations/journals/tods/1993-18-3/p487-sagiv/; http://www.acm.org/pubs/toc/Abstracts/tods/155277.html", abstract = "Suppose a database schema {\bf D} is extended to {\bf D'} by adding new relation schemas, and states for {\bf D} are extended to states for {\bf D'} by applying joins and projections to existing relations. It is shown that certain desirable properties that {\bf D'} has with respect to {\bf D}. These properties amount to the ability to compute efficiently the join of all relations in a state for {\bf D} from an extension of this state over {\bf D'}. The equivalence is proved for unrestricted (i.e., both finite and infinite) databases. If {\bf D'} is obtained from {\bf D} by adding a set of new relation schemas that form a tree schema, then the equivalence also holds for finite databases. In this case there is also a polynomial time algorithm for testing the existence of a tree projection of {\bf D'} with respect to {\bf D}.", acknowledgement = ack-nhfb, affiliation = "Hebrew Univ., Jerusalem, Israel", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "acyclicity; algorithms; chase; database schema; design; hypergraph; inclusion dependency; join; monotone join expression; projection; qual graph; relational database; semijoin; semijoin reduction; tableau; theory; tree projection; tree schema", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing.", } @Article{Ioannidis:1993:TCA, author = "Yannis Ioannidis and Raghu Ramakrishnan and Linda Winger", title = "Transitive Closure Algorithms Based on Graph Traversal", journal = j-TODS, volume = "18", number = "3", pages = "512--576", month = sep, year = "1993", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1993-18-3/p512-ioannidis/p512-ioannidis.pdf; http://www.acm.org/pubs/citations/journals/tods/1993-18-3/p512-ioannidis/; http://www.acm.org/pubs/toc/Abstracts/tods/155273.html", abstract = "Several graph-based algorithms have been proposed in the literature to compute the transitive closure of a directed graph. We develop two new algorithms (Basic\_TC and Gobal\_DFTC) and compare the performance of their implementations in a disk-based environment with a well-known graph-based algorithm proposed by Schmitz. Our algorithms use depth-first search to traverse a graph and a technique called {\em marking\/} to avoid processing some of the arcs in the graph. They compute the closure by processing nodes in reverse topological order, building descendent sets by adding the descendent sets of children. While the details of these algorithms differ considerably, one important difference among them is the time at which descendent set additions are performed. Basic\_TC, results in superior performance. The first reason is that early additions result in larger descendent set sizes on the average over the duration of the execution, thereby causing more I/O; very often this turns out to more than offset the gains of not having to fetch certain sets again to add them. The second reason is that information collected in the first pass can be used to apply several optimizations in the second pass. To the extent possible, we also adapt these algorithms to perform path computations. Again, our performance comparison confirms the trends seen in reachability queries. Taken in conjunction with another performance study our results indicate that all graph-based algorithms significantly outperform other types of algorithms such as Seminaive and Warren.", acknowledgement = ack-nhfb, affiliation = "Wisconsin Univ., Madison, WI, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; depth-first search; node reachability; path computations; performance; transitive closure", subject = "{\bf D.3.4}: Software, PROGRAMMING LANGUAGES, Processors. {\bf F.2.2}: Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Nonnumerical Algorithms and Problems, Computations on discrete structures. {\bf D.3.3}: Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Recursion. {\bf D.4.2}: Software, OPERATING SYSTEMS, Storage Management, Main memory. {\bf D.4.2}: Software, OPERATING SYSTEMS, Storage Management, Secondary storage. {\bf D.4.2}: Software, OPERATING SYSTEMS, Storage Management, Swapping. {\bf E.1}: Data, DATA STRUCTURES, Graphs. {\bf E.1}: Data, DATA STRUCTURES, Trees. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing.", } @Article{Thomasian:1993:TPL, author = "Alexander Thomasian", title = "Two-Phase Locking Performance and Its Thrashing Behavior", journal = j-TODS, volume = "18", number = "4", pages = "579--625", month = dec, year = "1993", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1993-18-4/p579-thomasian/p579-thomasian.pdf; http://www.acm.org/pubs/citations/journals/tods/1993-18-4/p579-thomasian/; http://www.acm.org/pubs/toc/Abstracts/tods/169720.html", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency control; data contention; load control; performance; theory; thrashing; two-phase locking", subject = "{\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Deadlock avoidance. {\bf D.4.8}: Software, OPERATING SYSTEMS, Performance, Modeling and prediction. {\bf D.4.8}: Software, OPERATING SYSTEMS, Performance, Operational analysis. {\bf D.4.8}: Software, OPERATING SYSTEMS, Performance, Simulation. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing.", } @Article{Qian:1993:DSD, author = "Xiaolei Qian", title = "The Deductive Synthesis of Database Transactions", journal = j-TODS, volume = "18", number = "4", pages = "626--677", month = dec, year = "1993", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1993-18-4/p626-qian/p626-qian.pdf; http://www.acm.org/pubs/citations/journals/tods/1993-18-4/p626-qian/; http://www.acm.org/pubs/toc/Abstracts/tods/169716.html", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database programming; deductive tableau; design; integrity constraints; search control; theory; transaction logic; transaction synthesis; verification", subject = "{\bf I.2.2}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Automatic Programming, Program synthesis. {\bf D.1.2}: Software, PROGRAMMING TECHNIQUES, Automatic Programming. {\bf F.3.1}: Theory of Computation, LOGICS AND MEANINGS OF PROGRAMS, Specifying and Verifying and Reasoning about Programs, Logics of programs. {\bf H.2.0}: Information Systems, DATABASE MANAGEMENT, General, Security, integrity, and protection. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Database (persistent) programming languages. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing.", } @Article{Malvestuto:1993:USA, author = "Francesco M. Malvestuto", title = "A Universal-Scheme Approach to Statistical Databases Containing Homogeneous Summary Tables", journal = j-TODS, volume = "18", number = "4", pages = "678--708", month = dec, year = "1993", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1993-18-4/p678-malvestuto/p678-malvestuto.pdf; http://www.acm.org/pubs/citations/journals/tods/1993-18-4/p678-malvestuto/; http://www.acm.org/pubs/toc/Abstracts/tods/169712.html", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "bipartite graph; category relation; design; query-answering system; statistical database; summary table; theory; universal classification scheme", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf G.1.3}: Mathematics of Computing, NUMERICAL ANALYSIS, Numerical Linear Algebra, Linear systems (direct and iterative methods). {\bf G.1.6}: Mathematics of Computing, NUMERICAL ANALYSIS, Optimization, Integer programming. {\bf G.1.6}: Mathematics of Computing, NUMERICAL ANALYSIS, Optimization, Linear programming. {\bf G.2.2}: Mathematics of Computing, DISCRETE MATHEMATICS, Graph Theory, Graph algorithms. {\bf G.2.2}: Mathematics of Computing, DISCRETE MATHEMATICS, Graph Theory, Network problems. {\bf G.2.2}: Mathematics of Computing, DISCRETE MATHEMATICS, Graph Theory, Trees. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages.", } @Article{Ioannidis:1993:OHL, author = "Yannis E. Ioannidis and Stavros Christodoulakis", title = "Optimal Histograms for Limiting Worst-Case Error Propagation in the Size of Join Results", journal = j-TODS, volume = "18", number = "4", pages = "709--748", month = dec, year = "1993", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1993-18-4/p709-ioannidis/p709-ioannidis.pdf; http://www.acm.org/pubs/citations/journals/tods/1993-18-4/p709-ioannidis/; http://www.acm.org/pubs/toc/Abstracts/tods/169708.html", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "histograms; join size estimation; performance; query optimization; theory; vector majorization", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf G.1.0}: Mathematics of Computing, NUMERICAL ANALYSIS, General, Error analysis. {\bf H.1.1}: Information Systems, MODELS AND PRINCIPLES, Systems and Information Theory.", } @Article{Anonymous:1993:AI, author = "Anonymous", title = "1993 Author Index", journal = j-TODS, volume = "18", number = "4", pages = "749--750", month = dec, year = "1993", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 10 12:59:37 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Kim:1994:CS, author = "Won Kim", title = "Charter and Scope", journal = j-TODS, volume = "19", number = "1", pages = "1--??", month = mar, year = "1994", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Dec 6 18:01:56 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Olivier:1994:TSO, author = "Martin S. Olivier and Sebastiaan H. {von Solms}", title = "A Taxonomy for Secure Object-Oriented Databases", journal = j-TODS, volume = "19", number = "1", pages = "3--46", month = mar, year = "1994", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1994-19-1/p3-olivier/p3-olivier.pdf; http://www.acm.org/pubs/citations/journals/tods/1994-19-1/p3-olivier/; http://www.acm.org/pubs/toc/Abstracts/tods/174640.html", abstract = "This paper proposes a taxonomy for secure object-oriented databases in order to clarify the issues in modeling and implementing such databases. It also indicates some implications of the various choices one may make when designing such a database.\par Most secure database models have been designed for relational databases. The object-oriented database model is more complex than the relational model. For these reasons, models for secure object-oriented databases are more complex than their relational counterparts. Furthermore, since views of the object-oriented model differ, each security model has to make some assumptions about the object-oriented model used for its particular database.\par A number of models for secure object-oriented databases have been proposed. These models differ in many respects, because they focus on different aspects of the security problem, or because they make different assumptions about what constitutes a secure database or because they make different assumptions about the object-oriented model.\par The taxonomy proposed in this paper may be used to compare the various models: Models that focus on specific issues may be positioned in the broader context with the aid of the taxonomy. The taxonomy also identifies the major aspects where security models may differ and indicates some alternatives available to the system designer for each such design choice. We show some implications of using specific alternatives.\par Since differences between models for secure object-oriented databases are often subtle, a formal notation is necessary for a proper comparison. Such a formal notation also facilitates the formal derivation of restrictions that apply under specific conditions. The formal approach also gives a clear indication about the assumptions made by us---given as axioms---and the consequences of those assumptions (and of design choices made by the model designer)---given as theorems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Security", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; formal security models; information security; multilevel secure databases; object-orientation; security", subject = "{\bf H.2.0}: Information Systems, DATABASE MANAGEMENT, General, Security, integrity, and protection. {\bf D.4.6}: Software, OPERATING SYSTEMS, Security and Protection. {\bf K.6.5}: Computing Milieux, MANAGEMENT OF COMPUTING AND INFORMATION SYSTEMS, Security and Protection. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design.", } @Article{Tendick:1994:MRP, author = "Patrick Tendick and Norman Matloff", title = "A Modified Random Perturbation Method for Database Security", journal = j-TODS, volume = "19", number = "1", pages = "47--63", month = mar, year = "1994", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1994-19-1/p47-tendick/p47-tendick.pdf; http://www.acm.org/pubs/citations/journals/tods/1994-19-1/p47-tendick/; http://www.acm.org/pubs/toc/Abstracts/tods/174641.html", abstract = "The random data perturbation (RDP) method of preserving the privacy of individual records in a statistical database is discussed. In particular, it is shown that if confidential attributes are allowed as query-defining variables, severe biases may result in responses to queries. It is also shown that even if query definition through confidential variables is {\em not\/} allowed, biases can still occur in responses to queries such as those involving proportions or counts. In either case, serious distortions may occur in user statistical analyses. A modified version of RDP is presented, in the form of a query adjustment procedure and specialized perturbation structure which will produce unbiased results.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Security", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "bias; correlation; design; noise addition; random perturbation method; security", subject = "{\bf H.2.0}: Information Systems, DATABASE MANAGEMENT, General, Security, integrity, and protection. {\bf D.4.6}: Software, OPERATING SYSTEMS, Security and Protection, Access controls. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing.", } @Article{Clifford:1994:CHR, author = "James Clifford and Albert Croker and Alexander Tuzhilin", title = "On Completeness of Historical Relational Query Languages", journal = j-TODS, volume = "19", number = "1", pages = "64--116", month = mar, year = "1994", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1994-19-1/p64-clifford/p64-clifford.pdf; http://www.acm.org/pubs/citations/journals/tods/1994-19-1/p64-clifford/; http://www.acm.org/pubs/toc/Abstracts/tods/174642.html", abstract = "Numerous proposals for extending the relational data model to incorporate the temporal dimension of data have appeared in the past several years. These proposals have differed considerably in the way that the temporal dimension has been incorporated both into the {\em structure\/} of the extended relations of these temporal models and into the extended relational {\em algebra\/} or {\em calculus\/} that they define. Because of these differences, it has been difficult to compare the proposed models and to make judgments as to which of them might in some sense be equivalent or even {\em better}. In this paper we define {\em temporally grouped\/} and {\em temporally ungrouped\/} historical data models and propose two notions of {\em historical relational completeness}, analogous to Codd's notion of relational completeness, one for each type of model. We show that the temporally ungrouped models are less expressive than the grouped models, but demonstrate a technique for extending the ungrouped models with a grouping mechanism to capture the additional semantic power of temporal grouping. For the ungrouped models, we define three different languages, a logic with explicit reference to time, a temporal logic, and a temporal algebra, and motivate our choice for the first of these as the basis for completeness for these models. For the grouped models, we define a many-sorted logic with variables over ordinary values, historical values, and times. Finally, we demonstrate the equivalence of this grouped calculus and the ungrouped calculus extended with a grouping mechanism. We believe the classification of historical data models into grouped and ungrouped models provides a useful framework for the comparison of models in the literature, and furthermore, the exposition of equivalent languages for each type provides reasonable standards for common, and minimal, notions of historical relational completeness.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "completeness; historical databases; languages; query languages; relational model; temporal databases; temporal grouping; temporal logic; theory", subject = "{\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models.", } @Article{Salem:1994:AL, author = "Kenneth Salem and H{\'e}ctor Garc{\'\i}a-Molina and Jeannie Shands", title = "Altruistic Locking", journal = j-TODS, volume = "19", number = "1", pages = "117--165", month = mar, year = "1994", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "Database/Graefe.bib; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1994-19-1/p117-salem/p117-salem.pdf; http://www.acm.org/pubs/citations/journals/tods/1994-19-1/p117-salem/; http://www.acm.org/pubs/toc/Abstracts/tods/174639.html", abstract = "Long-lived transactions (LLTs) hold on to database resources for relatively long periods of time, significantly delaying the completion of shorter and more common transactions. To alleviate this problem we propose an extension to two-phase locking, called altruistic locking, whereby LLTs can release their locks early. Transactions that access this released data are said to run in the wake of the LLT and must follow special locking rules. Like two-phase locking, altruistic locking is easy to implement and guarantees serializability.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; atomicity; locking; performance; scheduling; serializability; theory, concurrency control ``wake'' of a single transaction TODS", subject = "{\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Deadlock avoidance. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing.", } @Article{Rosenthal:1994:TTR, author = "Arnon Rosenthal and David Reiner", title = "Tools and Transformations --- Rigorous and Otherwise --- for Practical Database Design", journal = j-TODS, volume = "19", number = "2", pages = "167--211", month = jun, year = "1994", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1994-19-2/p167-rosenthal/p167-rosenthal.pdf; http://www.acm.org/pubs/citations/journals/tods/1994-19-2/p167-rosenthal/; http://www.acm.org/pubs/toc/Abstracts/tods/176568.html", abstract = "We describe the tools and theory of a comprehensive system for database design, and show how they work together to support multiple conceptual and logical design processes. The Database Design and Evaluation Workbench (DDEW) system uses a rigorous, information-content-preserving approach to schema transformation, but combines it with heuristics, guess work, and user interactions. The main contribution lies in illustrating how theory was adapted to a practical system, and how the consistency and power of a design system can be increased by use of theory.\par First, we explain why a design system needs multiple data models, and how implementation over a unified underlying model reduces redundancy and inconsistency. Second, we present a core set of small but fundamental algorithms that rearrange a schema without changing its information content. From these reusable components, we easily built larger tools and transformations that were still formally justified. Third, we describe heuristic tools that attempt to improve a schema, often by adding missing information. In these tools, unreliable techniques such as normalization and relationship inference are bolstered by system-guided user interactions to remove errors. We present a rigorous criterion for identifying unnecessary relationships, and discuss an interactive view integrator. Last, we examine the relevance of database theory to building these practically motivated tools and contrast the paradigms of system builders with those of theoreticians.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "applications of database theory; computer-aided software engineering; data model translation; database design; database equivalence; design; design heuristics; entity-relationship model; heuristics; normalization; theory; view integration", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema. {\bf D.2.2}: Software, SOFTWARE ENGINEERING, Tools and Techniques, Programmer workbench. {\bf D.2.2}: Software, SOFTWARE ENGINEERING, Tools and Techniques, Software libraries. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Normal forms.", } @Article{Bright:1994:ARS, author = "M. W. Bright and A. R. Hurson and S. Pakzad", title = "Automated Resolution of Semantic Heterogeneity in Multidatabases", journal = j-TODS, volume = "19", number = "2", pages = "212--253", month = jun, year = "1994", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1994-19-2/p212-bright/p212-bright.pdf; http://www.acm.org/pubs/citations/journals/tods/1994-19-2/p212-bright/; http://www.acm.org/pubs/toc/Abstracts/tods/176569.html", abstract = "A multidatabase system provides integrated access to heterogeneous, autonomous local databases in a distributed system. An important problem in current multidatabase systems is identification of semantically similar data in different local databases. The Summary Schemas Model (SSM) is proposed as an extension to multidatabase systems to aid in semantic identification. The SSM uses a global data structure to abstract the information available in a multidatabase system. This abstracted form allows users to use their own terms (imprecise queries) when accessing data rather than being forced to use system-specified terms. The system uses the global data structure to match the user's terms to the semantically closest available system terms. A simulation of the SSM is presented to compare imprecise-query processing with corresponding query-processing costs in a standard multidatabase system. The costs and benefits of the SSM are discussed, and future research directions are presented.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; federated database; imprecise queries; multidatabase; performance; schemas; semantic heterogeneity; theory", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf H.2.5}: Information Systems, DATABASE MANAGEMENT, Heterogeneous Databases.", } @Article{Sciore:1994:USV, author = "Edward Sciore and Michael Siegel and Arnon Rosenthal", title = "Using Semantic Values to Facilitate Interoperability Among Heterogeneous Information Systems", journal = j-TODS, volume = "19", number = "2", pages = "254--290", month = jun, year = "1994", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1994-19-2/p254-sciore/p254-sciore.pdf; http://www.acm.org/pubs/citations/journals/tods/1994-19-2/p254-sciore/; http://www.acm.org/pubs/toc/Abstracts/tods/176570.html", abstract = "Large organizations need to exchange information among many separately developed systems. In order for this exchange to be useful, the individual systems must agree on the meaning of their exchanged data. That is, the organization must ensure {\em semantic interoperability}. This paper provides a theory of {\em semantic values\/} as a unit of exchange that facilitates semantic interoperability between heterogeneous information systems. We show how semantic values can either be stored explicitly or be defined by {\em environments}. A system architecture is presented that allows autonomous components to share semantic values. The key component in this architecture is called the {\em context mediator}, whose job is to identify and construct the semantic values being sent, to determine when the exchange is meaningful, and to convert the semantic values to the form required by the receiver.\par Our theory is then applied to the relational model. We provide an interpretation of standard SQL queries in which context conversions and manipulations are transparent to the user. We also introduce an extension of SQL, called Context-SQL (C-SQL), in which the {\em context\/} of a semantic value can be explicitly accessed and updated. Finally, we describe the implementation of a prototype context mediator for a relational C-SQL system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages; Management", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; languages; management", subject = "{\bf H.2.5}: Information Systems, DATABASE MANAGEMENT, Heterogeneous Databases, Data translation. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems.", } @Article{Subrahmanian:1994:AKB, author = "V. S. Subrahmanian", title = "Amalgamating Knowledge Bases", journal = j-TODS, volume = "19", number = "2", pages = "291--331", month = jun, year = "1994", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1994-19-2/p291-subrahmanian/p291-subrahmanian.pdf; http://www.acm.org/pubs/citations/journals/tods/1994-19-2/p291-subrahmanian/; http://www.acm.org/pubs/toc/Abstracts/tods/176571.html", abstract = "The integration of knowledge for multiple sources is an important aspect of automated reasoning systems. When different knowledge bases are used to store knowledge provided by multiple sources, we are faced with the problem of integrating multiple knowledge bases: Under these circumstances, we are also confronted with the prospect of inconsistency. In this paper we present a uniform theoretical framework, based on annotated logics, for amalgamating multiple knowledge bases when these knowledge bases (possibly) contain inconsistencies, uncertainties, and nonmonotonic modes of negation. We show that annotated logics may be used, with some modifications, to mediate between different knowledge bases. The multiple knowledge bases are amalgamated by a transformation of the individual knowledge bases into new annotated logic programs, together with the addition of a new axiom scheme. We characterize the declarative semantics of such amalgamated knowledge bases and study how the semantics of the amalgam is related to the semantics of the individual knowledge bases being combined.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Languages", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "amalgamated knowledge bases; annotated logics; languages", subject = "{\bf I.2.4}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Knowledge Representation Formalisms and Methods, Representations (procedural and rule-based). {\bf H.2.5}: Information Systems, DATABASE MANAGEMENT, Heterogeneous Databases, Data translation. {\bf I.2.4}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Knowledge Representation Formalisms and Methods, Representation languages.", } @Article{Yan:1994:ISS, author = "Tak W. Yan and H{\'e}ctor Garc{\'\i}a-Molina", title = "Index Structures for Selective Dissemination of Information Under the {Boolean} Model", journal = j-TODS, volume = "19", number = "2", pages = "332--364", month = jun, year = "1994", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1994-19-2/p332-yan/p332-yan.pdf; http://www.acm.org/pubs/citations/journals/tods/1994-19-2/p332-yan/; http://www.acm.org/pubs/toc/Abstracts/tods/176573.html", abstract = "The number, size, and user population of bibliographic and full-text document databases are rapidly growing. With a high document arrival rate, it becomes essential for users of such databases to have access to the very latest documents; yet the high document arrival rate also makes it difficult for users to keep themselves updated. It is desirable to allow users to submit profiles, i.e., queries that are constantly evaluated, so that they will be automatically informed of new additions that may be of interest. Such service is traditionally called Selective Dissemination of Information (SDI).\par The high document arrival rate, the huge number of users, and the timeliness requirement of the service pose a challenge in achieving efficient SDL. In this article, we propose several index structures for indexing profiles and algorithms that efficiently match documents against large number of profiles. We also present analysis and simulation results to compare their performance under different scenarios.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; performance", subject = "{\bf H.3.1}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Content Analysis and Indexing, Indexing methods. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf H.3.4}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Systems and Software, Current awareness systems (selective dissemination of information --- SDI).", } @Article{Ceri:1994:AGP, author = "Stefano Ceri and Piero Fraternali and Stefano Paraboschi and Letizia Tanca", title = "Automatic Generation of Production Rules for Integrity Maintenance", journal = j-TODS, volume = "19", number = "3", pages = "367--422", month = sep, year = "1994", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1994-19-3/p367-ceri/p367-ceri.pdf; http://www.acm.org/pubs/citations/journals/tods/1994-19-3/p367-ceri/; http://www.acm.org/pubs/toc/Abstracts/tods/185828.html", abstract = "In this article we present an approach to integrity maintenance, consisting of automatically generating production rules for integrity enforcement. Constraints are expressed as particular formulas of Domain Relational Calculus; they are automatically translated into a set of repair actions, encoded as production rules of an active database system. Production rules may be redundant (they enforce the same constraint in different ways) and conflicting (because repairing one constraint may cause the violation of another constraint). Thus, it is necessary to develop techniques for analyzing the properties of the set of active rules and for ensuring that any computation of production rules after any incorrect transaction terminates and produces a consistent database state.\par Along these guidelines, we describe a specific architecture for constraint definition and enforcement. The components of the architecture include a {\em Rule Generator}, for producing all possible repair actions, and a {\em Rule Analyzer and Selector}, for producing a collection of production rules such that their execution after an incorrect transaction always terminates in a consistent state (possibly by rolling back the transaction); moreover, the needs of applications are modeled, so that integrity-enforcing rules reach the final state that better represents the original intentions of the transaction's supplier. Specific input from the designer can also drive the process and integrate or modify the rules generated automatically by the method. Experimental results of a prototype implementation of the proposed architecture are also described.", acknowledgement = ack-nhfb, affiliation = "Dipartimento di Elettronica, Politecnico di Milano, Italy", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Experimentation; Management", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "automatic generation of production rules; experimentation; management", subject = "{\bf H.2.0}: Information Systems, DATABASE MANAGEMENT, General, Security, integrity, and protection. {\bf I.2.2}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Automatic Programming.", } @Article{Polyzois:1994:ERB, author = "Christos A. Polyzois and H{\'e}ctor Garc{\'\i}a-Molina", title = "Evaluation of Remote Backup Algorithms for Transaction-Processing Systems", journal = j-TODS, volume = "19", number = "3", pages = "423--449", month = sep, year = "1994", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1994-19-3/p423-polyzois/p423-polyzois.pdf; http://www.acm.org/pubs/citations/journals/tods/1994-19-3/p423-polyzois/; http://www.acm.org/pubs/toc/Abstracts/tods/185836.html", abstract = "A remote backup is a copy of a primary database maintained at a geographically separate location and is used to increase data availability. Remote backup systems are typically log-based and can be classified into 2-safe and 1-safe, depending on whether transactions commit at both sites simultaneously or first commit at the primary and are later propagated to the backup. We have built an experimental database system on which we evaluated the performance of the epoch and the dependency reconstruction algorithms, two 1-safe algorithms we have developed. We compared the 1-safe with the 2-safe approach under various conditions.", acknowledgement = ack-nhfb, affiliation = "IBM Thomas J. Watson Res. Center, Yorktown Heights, NY, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; disaster recovery; hot spare; hot standby; performance; reliability; remote backup", subject = "{\bf H.2.7}: Information Systems, DATABASE MANAGEMENT, Database Administration, Logging and recovery. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed applications. {\bf D.4.5}: Software, OPERATING SYSTEMS, Reliability, Backup procedures. {\bf D.4.5}: Software, OPERATING SYSTEMS, Reliability, Fault-tolerance.", } @Article{Chrysanthis:1994:SET, author = "Panos K. Chrysanthis and Krithi Ramamritham", title = "Synthesis of Extended Transaction Models Using {ACTA}", journal = j-TODS, volume = "19", number = "3", pages = "450--491", month = sep, year = "1994", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1994-19-3/p450-chrysanthis/p450-chrysanthis.pdf; http://www.acm.org/pubs/citations/journals/tods/1994-19-3/p450-chrysanthis/; http://www.acm.org/pubs/toc/Abstracts/tods/185843.html", abstract = "ACTA is a comprehensive transaction framework that facilitates the formal description of properties of extended transaction models. Specifically, using ACTA, one can specify and reason about (1) the effects of transactions on objects and (2) the interactions between transactions. This article presents ACTA {\em as a tool for the synthesis of extended transaction models}, one which supports the development and analysis of new extended transaction models in a systematic manner. Here, this is demonstrated by deriving new transaction definitions (1) by modifying the specifications of existing transaction models, (2) by combining the specifications of existing models, and (3) by starting from first principles. To exemplify the first, new models are synthesized from {\em atomic transactions\/} and {\em join transactions}. To illustrate the second, we synthesize a model that combines aspect of the {\em nested\/}- and {\em split-transaction\/} models. We demonstrate the latter by deriving the specification of an {\em open-nested-transaction\/} model from high-level requirements.", acknowledgement = ack-nhfb, affiliation = "Pittsburgh Univ., PA, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Reliability; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency control; correctness criteria; design; reliability; semantics; serializability theory; theory; transaction models; verification", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf D.2.4}: Software, SOFTWARE ENGINEERING, Program Verification, Correctness proofs. {\bf D.3.3}: Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Abstract data types. {\bf D.4.1}: Software, OPERATING SYSTEMS, Process Management, Concurrency. {\bf F.3.1}: Theory of Computation, LOGICS AND MEANINGS OF PROGRAMS, Specifying and Verifying and Reasoning about Programs, Assertions. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems.", } @Article{Korth:1994:FAC, author = "Henry F. Korth and Greg Speegle", title = "Formal Aspects of Concurrency Control in Long-Duration Transaction Systems Using the {NT\slash PV} model", journal = j-TODS, volume = "19", number = "3", pages = "492--535", month = sep, year = "1994", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1994-19-3/p492-korth/p492-korth.pdf; http://www.acm.org/pubs/citations/journals/tods/1994-19-3/p492-korth/; http://www.acm.org/pubs/toc/Abstracts/tods/185854.html", abstract = "In the typical database system, an execution is correct if it is equivalent to some serial execution. This criterion, called serializability, is unacceptable for new database applications which require long-duration transactions. We present a new transaction model which allows correctness criteria more suitable for these applications. This model combines three enhancements to the standard model: nested transactions, explicit predicates, and multiple versions. These features yield the name of the new model, nested transactions with predicates and versions, or NT/PV.\par The modular nature of the NT/PV model allows a straightforward representation of simple systems. It also provides a formal framework for describing complex interactions. The most complex interactions the model allows can be captured by a protocol which exploits all of the semantics available to the NT/PV model. An example of these interactions is shown in a CASE application. The example shows how a system based on the NT/PV model is superior to both standard database techniques and unrestricted systems in both correctness and performance.", acknowledgement = ack-nhfb, affiliation = "Matsushita Inf. Technol. Lab., Panasonic Technol. Inc., Princeton, NJ, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency control protocol; semantic information; theory; transaction processing", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing.", } @Article{Goldman:1994:QCN, author = "Kenneth J. Goldman and Nancy Lynch", title = "Quorum Consensus in Nested-Transaction Systems", journal = j-TODS, volume = "19", number = "4", pages = "537--585", month = dec, year = "1994", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1994-19-4/p537-goldman/p537-goldman.pdf; http://www.acm.org/pubs/citations/journals/tods/1994-19-4/p537-goldman/; http://www.acm.org/pubs/toc/Abstracts/tods/195666.html", abstract = "Gifford's Quorum Consensus algorithm for data replication is studied in the context of nested transactions and transaction failures (aborts), and a fully developed reconfiguration strategy is presented. A formal description of the algorithm is presented using the Input/Output automaton model for nested-transaction systems due to Lynch and Merritt. In this description, the algorithm itself is described in terms of nested transactions. The formal description is used to construct a complete proof of correctness that uses standard assertional techniques, is based on a natural correctness condition, and takes advantage of modularity that arises from describing the algorithm as nested transactions. The proof is accomplished hierarchically, showing that a fully replicated reconfigurable system ``simulates'' an intermediate replicated system, and that the intermediate system simulates an unreplicated system. The presentation and proof treat issues of data replication entirely separately from issues of concurrency control and recovery.", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Washington Univ., St. Louis, MO, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; concurrency control; data replication; hierarchical proofs; I/O automata; nested transactions; quorum consensus; theory; verification", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems.", } @Article{Krishnakumar:1994:BIT, author = "Narayanan Krishnakumar and Arthur J. Bernstein", title = "Bounded Ignorance: a Technique for Increasing Concurrency in a Replicated System", journal = j-TODS, volume = "19", number = "4", pages = "586--625", month = dec, year = "1994", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1994-19-4/p586-krishnakumar/p586-krishnakumar.pdf; http://www.acm.org/pubs/citations/journals/tods/1994-19-4/p586-krishnakumar/; http://www.acm.org/pubs/toc/Abstracts/tods/195670.html", abstract = "Databases are replicated to improve performance and availability. The notion of correctness that has commonly been adopted for concurrent access by transactions to shared, possibly replicated, data is serializability. However, serializability may be impractical in high-performance applications since it imposes too stringent a restriction on concurrency. When serializability is relaxed, the integrity constraints describing the data may be violated. By allowing bounded violations of the integrity constraints, however, we are able to increase the concurrency of transactions that execute in a replicated environment. In this article, we introduce the notion of an {\em N-ignorant\/} transaction, which is a transaction that may be ignorant of the results of at most $N$ prior transactions, which is a transaction that may be ignorant of the results of at most $N$ prior transactions. A system in which all transactions are {\em N-ignorant\/} can have an $N$ + 1-fold increase in concurrency over serializable systems, at the expense of bounded violations of its integrity constraints. We present algorithms for implementing replicated databases in {\em N-ignorant\/} systems. We then provide constructive methods for calculating the reachable states in such systems, given the value of $N$, so that one may assess the maximum liability that is incurred in allowing constraint violation. Finally, we generalize the notion of {\em N-ignorance\/} to a matrix of ignorance for the purpose of higher concurrency.", acknowledgement = ack-nhfb, affiliation = "State Univ. of New York, Stony Brook, NY, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; concurrency control; integrity constraints; performance; reachability analysis; replication; serializability; theory", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed applications. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems.", } @Article{Winslett:1994:FQL, author = "Marianne Winslett and Kenneth Smith and Xiaolei Qian", title = "Formal Query Languages for Secure Relational Databases", journal = j-TODS, volume = "19", number = "4", pages = "626--662", month = dec, year = "1994", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1994-19-4/p626-winslett/p626-winslett.pdf; http://www.acm.org/pubs/citations/journals/tods/1994-19-4/p626-winslett/; http://www.acm.org/pubs/toc/Abstracts/tods/195675.html", abstract = "The addition of stringent security specifications to the list of requirements for an application poses many new problems in DBMS design and implementation, as well as database design, use, and maintenance. Tight security requirements, such as those that result in silent masking of withholding of true information from a user or the introduction of false information into query answers, also raise fundamental questions about the meaning of the database and the semantics of accompanying query languages. In this paper, we propose a belief-based semantics for secure databases, which provides a semantics for databases that can ``lie'' about the state of the world, or about their knowledge about the state of the world, in order to preserve security. This kind of semantics can be used as a helpful retrofit for the proposals for a ``multilevel secure'' database model (a particularly stringent form of security), and may be useful for less restrictive security policies as well. We also propose a family of query languages for multilevel secure relational database applications, and base the semantics of those languages on our semantics for secure databases. Our query languages are free of the semantic problems associated with use of ordinary SQL in a multilevel secure context, and should be easy for users to understand and employ.", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Illinois Univ., Urbana, IL, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Security", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "formal security models; information security; multilevel secure databases; security", subject = "{\bf H.2.0}: Information Systems, DATABASE MANAGEMENT, General, Security, integrity, and protection. {\bf K.6.5}: Computing Milieux, MANAGEMENT OF COMPUTING AND INFORMATION SYSTEMS, Security and Protection. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages.", } @Article{Kim:1995:CS, author = "Won Kim", title = "Charter and scope", journal = j-TODS, volume = "20", number = "1", pages = "1--2", month = mar, year = "1995", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 10 13:00:12 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Aiken:1995:SAT, author = "Alexander Aiken and Joseph M. Hellerstein and Jennifer Widom", title = "Static Analysis Techniques for Predicting the Behavior of Active Database Rules", journal = j-TODS, volume = "20", number = "1", pages = "3--41", month = mar, year = "1995", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1995-20-1/p3-aiken/p3-aiken.pdf; http://www.acm.org/pubs/citations/journals/tods/1995-20-1/p3-aiken/; http://www.acm.org/pubs/toc/Abstracts/tods/202107.html", abstract = "This article gives methods for statically analyzing sets of active database rules to determine if the rules are (1) guaranteed to terminate, (2) guaranteed to produce a unique final database state, and (3) guaranteed to produce a unique stream of observable actions. If the analysis determines that one of these properties is not guaranteed, it isolates the rules responsible for the problem and determines criteria that, if satisfied, guarantee the property. The analysis methods are presented in the context of the {\em Starburst Rule System}.", acknowledgement = ack-nhfb, affiliation = "California Univ., Berkeley, CA, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Management; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "active database systems; algorithms; confluence; database rule processing; design; management; static analysis; termination; verification", subject = "{\bf H.2.m}: Information Systems, DATABASE MANAGEMENT, Miscellaneous. {\bf D.2.4}: Software, SOFTWARE ENGINEERING, Program Verification, Validation. {\bf I.2.5}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Programming Languages and Software.", } @Article{Chen:1995:DUR, author = "Weidong Chen", title = "Declarative Updates of Relational Databases", journal = j-TODS, volume = "20", number = "1", pages = "42--70", month = mar, year = "1995", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1995-20-1/p42-chen/p42-chen.pdf; http://www.acm.org/pubs/citations/journals/tods/1995-20-1/p42-chen/; http://www.acm.org/pubs/toc/Abstracts/tods/202110.html", abstract = "This article presents a declarative language, called {\em update calculus}, of relational database updates. A formula in update calculus involves conditions for the current database, as well as assertions about a new database. Logical connectives and quantifiers become constructors of complex updates, offering flexible specifications of database transformations. Update calculus can express all nondeterministic database transformations that are polynomial time.\par For set-at-a-time evaluation of updates, we present a corresponding {\em update algebra}. Existing techniques of query processing can be incorporated into update evaluation. We show that updates in update calculus can be translated into expressions in update algebra and vice versa.", acknowledgement = ack-nhfb, affiliation = "Southern Methodist Univ., Dallas, TX, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database languages; database updates; expressive power; languages; theory; update algebra; update calculus", subject = "{\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data manipulation languages (DML).", } @Article{Jagadish:1995:IDM, author = "H. V. Jagadish", title = "The {INCINERATE} data model", journal = j-TODS, volume = "20", number = "1", pages = "71--110", month = mar, year = "1995", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1995-20-1/p71-jagadish/p71-jagadish.pdf; http://www.acm.org/pubs/citations/journals/tods/1995-20-1/p71-jagadish/; http://www.acm.org/pubs/toc/Abstracts/tods/202113.html", abstract = "In this article, we present an extended relational algebra with universally or existentially quantified classes as attribute values. The proposed extension can greatly enhance the expressive power of relational systems, and significantly reduce the size of a database, at small additional computational cost. We also show how the proposed extensions can be built on top of a standard relational database system.", acknowledgement = ack-nhfb, affiliation = "AT\&T Bell Labs., Murray Hill, NJ, USA", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; design; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models.", } @Article{Baekgaard:1995:ICN, author = "Lars Baekgaard and Leo Mark", title = "Incremental Computation of Nested Relational Query Expressions", journal = j-TODS, volume = "20", number = "2", pages = "111--148", month = jun, year = "1995", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1995-20-2/p111-baekgaard/p111-baekgaard.pdf; http://www.acm.org/pubs/citations/journals/tods/1995-20-2/p111-baekgaard/; http://www.acm.org/pubs/toc/Abstracts/tods/210198.html", abstract = "Efficient algorithms for incrementally computing nested query expressions do not exist. Nested query expressions are query expressions in which selection/join predicates contain subqueries. In order to respond to this problem, we propose a two-step strategy for incrementally computing nested query expressions. In step (1), the query expression is transformed into an equivalent unnested flat query expression. In step (2), the flat query expression is incrementally computed. To support step (1), we have developed a very concise algebra-to-algebra transformation algorithm, and we have formally proved its correctness. The flat query expressions resulting from the transformation make intensive use of the relational set-difference operator. To support step (2), we present and analyze an efficient algorithm for incrementally computing set differences based on view pointer caches. When combined with existing incremental algorithms for SPJ queries, our incremental set-difference algorithm can be used to compute the unnested flat query expressions efficiently. It is important to notice that without our incremental set-difference algorithm the existing incremental algorithms for SPJ queries are useless for any query involving the set-difference operator, including queries that are not the result of unnesting nested queries.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; incremental computation; nested query expressions; performance; set differences; unnesting; view pointer caches", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Search process. {\bf H.3.3}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval, Query formulation. {\bf E.5}: Data, FILES, Sorting/searching.", } @Article{Chomicki:1995:ECT, author = "Jan Chomicki", title = "Efficient Checking of Temporal Integrity Constraints Using Bounded History Encoding", journal = j-TODS, volume = "20", number = "2", pages = "149--186", month = jun, year = "1995", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1995-20-2/p149-chomicki/p149-chomicki.pdf; http://www.acm.org/pubs/citations/journals/tods/1995-20-2/p149-chomicki/; http://www.acm.org/pubs/toc/Abstracts/tods/210200.html", abstract = "We present an efficient implementation method for temporal integrity constraints formulated in Past Temporal Logic. Although the constraints can refer to past states of the database, their checking does not require that the entire database history be stored. Instead, every database state is extended with auxiliary relations that contain the historical information necessary for checking constraints. Auxiliary relations can be implemented as materialized relational views.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "active databases; algorithms; database integrity; integrity constraints; real-time databases; temporal databases; temporal logic; theory; triggers", subject = "{\bf H.2.0}: Information Systems, DATABASE MANAGEMENT, General, Security, integrity, and protection. {\bf F.4.1}: Theory of Computation, MATHEMATICAL LOGIC AND FORMAL LANGUAGES, Mathematical Logic. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data description languages (DDL).", } @Article{Graefe:1995:FAU, author = "Goetz Graefe and Richard L. Cole", title = "Fast Algorithms for Universal Quantification in large Databases", journal = j-TODS, volume = "20", number = "2", pages = "187--236", month = jun, year = "1995", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1995-20-2/p187-graefe/p187-graefe.pdf; http://www.acm.org/pubs/citations/journals/tods/1995-20-2/p187-graefe/; http://www.acm.org/pubs/toc/Abstracts/tods/210202.html", abstract = "Universal quantification is not supported directly in most database systems despite the fact that it adds significant power to a system's query processing and inference capabilities, in particular for the analysis of many-to-many relationships and of set-valued attributes. One of the main reasons for this omission has been that universal quantification algorithms and their performance have not been explored for large databases. In this article, we describe and compare three known algorithms and one recently proposed algorithm for relational division, the algebra operator that embodies universal quantification. For each algorithm, we investigate the performance effects of explicit duplicate removal and referential integrity enforcement, variants for inputs larger than memory, and parallel execution strategies. Analytical and experimental performance comparisons illustrate the substantial differences among the algorithms. Moreover, comparisons demonstrate that the recently proposed division algorithm evaluates a universal quantification predicate over two relations as fast as hash (semi-) join evaluates an existential quantification predicate over the same relations. Thus, existential and universal quantification can be supported with equal efficiency by adding the recently proposed algorithm to a query evaluation system. A second result of our study is that universal quantification should be expressed directly in a database query language, because most query optimizers do not recognize the rather indirect formulations available in SQL as relational division and therefore produce very poor evaluation plans for many universal quantification queries.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Experimentation", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; experimentation", subject = "{\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf E.5}: Data, FILES. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data manipulation languages (DML).", } @Article{Chen:1995:QED, author = "Weidong Chen", title = "Query Evaluation in Deductive Databases with Alternating Fixpoint Semantics", journal = j-TODS, volume = "20", number = "3", pages = "239--287", month = sep, year = "1995", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", MRclass = "68P15 (68P20)", MRnumber = "96g:68024", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1995-20-3/p239-chen/p239-chen.pdf; http://www.acm.org/pubs/citations/journals/tods/1995-20-3/p239-chen/; http://www.acm.org/pubs/toc/Abstracts/tods/211416.html", abstract = "First-order formulas allow natural descriptions of queries and rules. Van Gelder's alternating fixpoint semantics extends the well-founded semantics of normal logic programs to general logic programs with arbitrary first-order formulas in rule bodies. However, an implementation of general logic programs through the standard translation into normal logic programs does not preserve the alternating fixpoint semantics. This paper presents a direct method for goal-oriented query evaluation of general logic programs. Every general logic program is first transformed into a normal form where the body of each rule is either an existential conjunction of literals or a universal disjunction of literals. Techniques of memoing and loop checking are incorporated so that termination and polynomial-time data complexity are guaranteed for deductive databases (or function-free programs). Results of the soundness and search space completeness are established.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "alternating fixpoint; deductive databases; negation; predicate logic; query evaluations; SLG resolution; theory; verification", subject = "{\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf I.2.3}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Deduction and Theorem Proving, Logic programming.", } @Article{Ioannidis:1995:CCQ, author = "Yannis E. Ioannidis and Raghu Ramakrishnan", title = "Containment of Conjunctive Queries: Beyond Relations as Sets", journal = j-TODS, volume = "20", number = "3", pages = "288--324", month = sep, year = "1995", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1995-20-3/p288-ioannidis/p288-ioannidis.pdf; http://www.acm.org/pubs/citations/journals/tods/1995-20-3/p288-ioannidis/; http://www.acm.org/pubs/toc/Abstracts/tods/211419.html", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; conjunctive queries; equivalence; languages; multisets; query containment; query optimization; theory", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf F.0}: Theory of Computation, GENERAL. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf H.1.1}: Information Systems, MODELS AND PRINCIPLES, Systems and Information Theory.", } @Article{Shasha:1995:TCA, author = "Dennis Shasha and Fran{\c{c}}ois Llirbat and Eric Simon and Patrick Valduriez", title = "Transaction Chopping: Algorithms and Performance Studies", journal = j-TODS, volume = "20", number = "3", pages = "325--363", month = sep, year = "1995", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1995-20-3/p325-shasha/p325-shasha.pdf; http://www.acm.org/pubs/citations/journals/tods/1995-20-3/p325-shasha/; http://www.acm.org/pubs/toc/Abstracts/tods/211427.html", abstract = "Chopping transactions into pieces is good for performance but may lead to nonserializable executions. Many researchers have reacted to this fact by either inventing new concurrency-control mechanisms, weakening serializability, or both. We adopt a different approach. We assume a user who\par ---has access only to user-level tools such as (1) choosing isolation degrees 1ndash;4, (2) the ability to execute a portion of a transaction using multiversion read consistency, and (3) the ability to reorder the instructions in transaction programs; and\par ---knows the set of transactions that may run during a certain interval (users are likely to have such knowledge for on-line or real-time transactional applications).\par Given this information, our algorithm finds the finest chopping of a set of transactions TranSet with the following property: {\em If the pieces of the chopping execute serializably, then TranSet executes serializably}. This permits users to obtain more concurrency while preserving correctness. Besides obtaining more intertransaction concurrency, chopping transactions in this way can enhance intratransaction parallelism.\par The algorithm is inexpensive, running in $ O(n x(e + m)) $ time, once conflicts are identified, using a naive implementation, where $n$ is the number of concurrent transactions in the interval $e$ is the number of edges in the conflict graph among the transactions, and $m$ is the maximum number of accesses of any transaction. This makes it feasible to add as a tuning knob to real systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; locking; multidatabase; performance; serializability; tuning", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf D.4.8}: Software, OPERATING SYSTEMS, Performance, Simulation. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf I.6.8}: Computing Methodologies, SIMULATION AND MODELING, Types of Simulation, Discrete event.", } @Article{Chen:1995:EML, author = "I.-Min A. Chen and Richard Hull and Dennis McLeod", title = "An Execution Model for Limited Ambiguity Rules and its Application to Derived Data Update", journal = j-TODS, volume = "20", number = "4", pages = "365--413", month = dec, year = "1995", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1995-20-4/p365-chen/p365-chen.pdf; http://www.acm.org/pubs/citations/journals/tods/1995-20-4/p365-chen/; http://www.acm.org/pubs/toc/Abstracts/tods/219039.html", abstract = "A novel execution model for rule application in active databases is developed and applied to the problem of updating derived data in a database represented using a semantic, object-based database model. The execution model is based on the use of ``limited ambiguity rules'' (LARs), which permit disjunction in rule actions. The execution model essentially performs a breadth-first exploration of alternative extensions of a user-requested update. Given an object-based database schema, both integrity constraints and specifications of derived classes and attributes are compiled into a family of limited ambiguity rules. A theoretical analysis shows that the approach is sound: the execution model returns all valid ``completions'' of a user-requested update, or terminates with an appropriate error notification. The complexity of the approach in connection with derived data update is considered.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Languages; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "active database systems; algorithms; deltas on database states; derived data; design; languages; limited ambiguity rules; management; semantic data models; theory; update propagation", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.m}: Information Systems, DATABASE MANAGEMENT, Miscellaneous.", } @Article{Fraternali:1995:SAD, author = "Piero Fraternali and Letizia Tanca", title = "A Structured Approach for the Definition of the Semantics of Active Databases", journal = j-TODS, volume = "20", number = "4", pages = "414--471", month = dec, year = "1995", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1995-20-4/p414-fraternali/p414-fraternali.pdf; http://www.acm.org/pubs/citations/journals/tods/1995-20-4/p414-fraternali/; http://www.acm.org/pubs/toc/Abstracts/tods/219042.html", abstract = "Active DBMSs couple database technology with rule-based programming to achieve the capability of reaction to database (and possibly external) stimuli, called {\em events}. The reactive capabilities of active databases are useful for a wide spectrum of applications, including security, view materialization, integrity checking and enforcement, or heterogeneous database integration, which makes this technology very promising for the near future. An active database system consists of a (passive) database and a set of {\em active rules\/}; the most popular form of active rule is the so-called {\em event-condition-action\/} (ECA) rule, which specifies an action to be executed upon the occurrence of one or more events, provided that a condition holds. Several active database systems and prototypes have been designed and partially or completely implemented. Unfortunately, they have been designed in a totally independent way, without the support of a common theory dictating the semantics of ECA rules, and thus often show different behaviors for rules with a similar form. In this article we consider a number of different possible options in the behavior of an active DBMS, based on a broad analysis of some of the best known implemented systems and prototypes. We encode these options in a user-readable form, called {\em Extended ECA}. A rule from any existing system can be rewritten in this formalism making all the semantic choices apparent. Then an EECA rule can be automatically translated into an internal (less readable) format, based on a logical style, which is called {\em core\/} format: the execution semantics of core rules is specified as the fixpoint of a simple transformation involving core rules. As an important premise to this research, a semantics for database updates and transactions has also been established, with respect to a notion of state that comprises both data and events. The article also presents an extensive bibliography on the subject of active databases.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "active database systems; database rule processing; events; fixpoint semantics; languages; rules; semantics; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design. {\bf F.3.2}: Theory of Computation, LOGICS AND MEANINGS OF PROGRAMS, Semantics of Programming Languages, Operational semantics. {\bf H.2.m}: Information Systems, DATABASE MANAGEMENT, Miscellaneous. {\bf I.2.5}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Programming Languages and Software.", } @Article{Srivastava:1995:SOD, author = "Divesh Srivastava and S. Sudarshan and Raghu Ramakrishnan and Jeffrey F. Naughton", title = "Space Optimization in Deductive Databases", journal = j-TODS, volume = "20", number = "4", pages = "472--516", month = dec, year = "1995", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1995-20-4/p472-srivastava/p472-srivastava.pdf; http://www.acm.org/pubs/citations/journals/tods/1995-20-4/p472-srivastava/; http://www.acm.org/pubs/toc/Abstracts/tods/219056.html", abstract = "In the bottom-up evaluation of logic programs and recursively defined views on databases, all generated facts are usually assumed to be stored until the end of the evaluation. Discarding facts during the evaluation, however, can considerably improve the efficiency of the evaluation: the space needed to evaluate the program, the I/O costs, the costs of maintaining and accessing indices, and the cost of eliminating duplicates may all be reduced. Given an evaluation method that is sound, complete, and does not repeat derivation steps, we consider how facts can be discarded during the evaluation without compromising these properties. We show that every such space optimization method has certain components, the first to ensure soundness and completeness, the second to avoid redundancy (i.e., repetition of derivations), and the third to reduce ``fact lifetimes'' (i.e., the time period for which each fact must be retained during evaluation). We present new techniques based on providing bounds on the number of derivations and uses of facts, and using monotonicity constraints for each of the first two components, and provide novel synchronization techniques for the third component of a space optimization method. We describe how techniques for each of the three components can be combined in practice to obtain a space optimization method for a program. Our results are also of importance in applications such as sequence querying, and in active databases where triggers are defined over multiple ``events.''", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; bottom-up query evaluation deductive database systems; discarding facts; languages; logic programming; theory", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages.", } @Article{Baralis:1996:MTA, author = "Elena Baralis and Stefano Ceri and Stefano Paraboschi", title = "Modularization Techniques for Active Rules Design", journal = j-TODS, volume = "21", number = "1", pages = "1--29", month = mar, year = "1996", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1996-21-1/p1-baralis/p1-baralis.pdf; http://www.acm.org/pubs/citations/journals/tods/1996-21-1/p1-baralis/; http://www.acm.org/pubs/toc/Abstracts/tods/227605.html", abstract = "Active database systems can be used to establish and enforce data management policies. A large amount of the semantics that normally needs to be coded in application programs can be abstracted and assigned to active rules. This trend is sometimes called ``knowledge independence'' a nice consequence of achieving full knowledge independence is that data management policies can then effectively evolve just by modifying rules instead of application programs. Active rules, however, may be quite complex to understand and manage: rules react to arbitrary event sequences, they trigger each other, and sometimes the outcome of rule processing may depend on the order in which events occur or rules are scheduled. Although reasoning on a large collection of rules is very difficult, the task becomes more manageable when the rules are few. Therefore, we are convinced that modularization, similar to what happens in any software development process, is the key principle for designing active rules; however, this important notion has not been addressed so far. This article introduces a modularization technique for active rules called stratification; it presents a theory of stratification and indicates how stratification can be practically applied. The emphasis of this article is on providing a solution to a very concrete and practical problem; therefore, our approach is illustrated by several examples.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "active database systems; database rule processing; design; modularization; static analysis; termination", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema. {\bf D.2.1}: Software, SOFTWARE ENGINEERING, Requirements/Specifications, Methodologies. {\bf D.2.2}: Software, SOFTWARE ENGINEERING, Tools and Techniques, Modules and interfaces. {\bf H.2.8}: Information Systems, DATABASE MANAGEMENT, Database applications.", } @Article{Buneman:1996:PTI, author = "Peter Buneman and Atsushi Ohori", title = "Polymorphism and Type Inference in Database Programming", journal = j-TODS, volume = "21", number = "1", pages = "30--76", month = mar, year = "1996", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1996-21-1/p30-buneman/p30-buneman.pdf; http://www.acm.org/pubs/citations/journals/tods/1996-21-1/p30-buneman/; http://www.acm.org/pubs/toc/Abstracts/tods/227609.html", abstract = "In order to find a static type system that adequately supports database languages, we need to express the most general type of a program that involves database operations. This can be achieved through an extension to the type system of ML that captures the polymorphic nation of field selection, together with a techniques that generalizes relational operators to arbitrary data structures. The combination provides a statically typed language in which generalized relational databases may be cleanly represented as typed structures. As in ML types are inferred, which relieves the programmer of making the type assertions that may be required in a complex database environment.\par These extensions may also be used to provide static polymorphic typechecking in object-oriented languages and databases. A problem that arises with object-oriented databases is the apparent need for dynamic typechecking when dealing queries on heterogeneous collections of objects. An extension of the type system needed for generalized relational operations can also be used for manipulating collections of dynamically typed values in a statically typed language. A prototype language based on these ideas has been implemented. While it lacks a proper treatment of persistent data, it demonstrates that a wide variety of database structures can be cleanly represented in a polymorphic programming language.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "generalized relational algebra; inheritance; object-oriented databases; polymorphism; record calculus; theory; type inference", subject = "{\bf D.3.3}: Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Data types and structures. {\bf D.3.1}: Software, PROGRAMMING LANGUAGES, Formal Definitions and Theory. {\bf D.3.2}: Software, PROGRAMMING LANGUAGES, Language Classifications, Applicative languages. {\bf D.3.3}: Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Abstract data types. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Database (persistent) programming languages. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages.", } @Article{Mok:1996:NFP, author = "Wai Yin Mok and Yiu-Kai Ng and David W. Embley", title = "A Normal Form for Precisely Characterizing Redundancy in Nested Relations", journal = j-TODS, volume = "21", number = "1", pages = "77--106", month = mar, year = "1996", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1996-21-1/p77-mok/p77-mok.pdf; http://www.acm.org/pubs/citations/journals/tods/1996-21-1/p77-mok/; http://www.acm.org/pubs/toc/Abstracts/tods/227612.html", abstract = "We give a straightforward definition for redundancy in individual nested relations and define a new normal form that precisely characterizes redundancy for nested relations. We base our definition of redundancy on an arbitrary set of functional and multivalued dependencies, and show that our definition of nested normal form generalizes standard relational normalization theory. In addition, we give a condition that can prevent an unwanted structural anomaly in nested relations, namely, embedded nested relations with at most one tuple. Like other normal forms, our nested normal form can serve as a guide for database design.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data redundancy; database design; design; functional and multivalued dependencies; nested normal form; nested relations; normalization theory; scheme trees; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Normal forms. {\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models.", } @Article{Mumick:1996:MC, author = "Inderpal Singh Mumick and Sheldon J. Finkelstein and Hamid Pirahesh and Raghu Ramakrishnan", title = "Magic conditions", journal = j-TODS, volume = "21", number = "1", pages = "107--155", month = mar, year = "1996", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1996-21-1/p107-mumick/p107-mumick.pdf; http://www.acm.org/pubs/citations/journals/tods/1996-21-1/p107-mumick/; http://www.acm.org/pubs/toc/Abstracts/tods/227624.html", abstract = "Much recent work has focused on the bottom-up evaluation of Datalog programs [Bancilhon and Ramakrishnan 1988]. One approach, called magic-sets, is based on rewriting a logic program so that bottom-up fixpoint evaluation of the program avoids generation of irrelevant facts [Bancilhon et al. 1986; Beeri and Ramakrishnan 1987; Ramakrishnan 1991]. It was widely believed for some time that the principal application of the magic-sets technique is to restrict computation in recursive queries using equijoin predicates. We extend the magic-sets transformation to use predicates other than equality ($ X > 10 $, for example) in restricting computation. The resulting {\em ground magic-sets transformation\/} is an important step in developing an extended magic-sets transformation that has practical utility in ``real'' relational databases, not only for recursive queries, but for nonrecursive queries as well [Mumick et al. 1990b; Mumick 1991].", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Measurement; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; bottom-up evaluation; constraint logic programming; constraints; deductive databases; magic sets; measurement; query optimization; relational databases; Starburst; theory; verification", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing.", } @Article{Liu:1996:BBS, author = "Ling Liu and Robert Meersman", title = "The Building Blocks for Specifying Communication Behavior of Complex Objects: An Activity-Driven Approach", journal = j-TODS, volume = "21", number = "2", pages = "157--207", month = jun, year = "1996", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1996-21-2/p157-liu/p157-liu.pdf; http://www.acm.org/pubs/citations/journals/tods/1996-21-2/p157-liu/; http://www.acm.org/pubs/toc/Abstracts/tods/232622.html", abstract = "Communication behavior represents dynamic evolution and cooperation of a group of objects in accomplishing a task. It is an important feature in object-oriented systems. We propose the concept of activity as a basic building block for declarative specification of communication behavior in object-oriented database systems, including the temporal ordering of message exchanges within object communication and the behavioral relationships between activity executions. We formally introduce two kinds of activity composition mechanisms: {\em activity specialization\/} and activity {\em aggregation\/} for abstract implementation of communication behavior. The former is suited for behavioral refinement of existing activities into specialized activities. The latter is used for behavioral composition of simpler activities into complex activities, and ultimately, into the envisaged database system. We use first-order temporal logic as an underlying formalism for specification of communication constraints. The well known Air-traffic-control case is used as a running example to highlight the underlying concepts, to illustrate the usefulness, and to assess the effectiveness of the activity model for declarative specification of communication behavior in the relevant universe of discourse. We also propose a methodological framework for integrating activity schema with entity schema in an object-oriented design environment.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "activity aggregation; activity patterns; activity specialization; communication behavior; design; first-order temporal logic; languages; object-oriented databases; synchronization schemes; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design. {\bf D.3.1}: Software, PROGRAMMING LANGUAGES, Formal Definitions and Theory, Semantics. {\bf F.3.1}: Theory of Computation, LOGICS AND MEANINGS OF PROGRAMS, Specifying and Verifying and Reasoning about Programs, Specification techniques. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems. {\bf C.2.4}: Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases.", } @Article{Ross:1996:TRE, author = "Kenneth A. Ross", title = "Tail Recursion Elimination in Deductive Databases", journal = j-TODS, volume = "21", number = "2", pages = "208--237", month = jun, year = "1996", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1996-21-2/p208-ross/p208-ross.pdf; http://www.acm.org/pubs/citations/journals/tods/1996-21-2/p208-ross/; http://www.acm.org/pubs/toc/Abstracts/tods/232628.html", abstract = "We consider an optimization technique for deductive and relational databases. The optimization technique is an extension of the magic templates rewriting, and it can improve the performance of query evaluation by not materializing the extension of intermediate views. Standard relational techniques, such as unfolding embedded view definitions, do not apply to recursively defined views, and so alternative techniques are necessary. We demonstrate the correctness of our rewriting. We define a class of ``nonrepeating'' view definitions, and show that for certain queries our rewriting performs at least as well as magic templates on nonrepeating views, and often much better. A syntactically recognizable property, called ``weak right-linearity'', is proposed. Weak right-linearity is a sufficient condition for nonrepetition and is more general than right-linearity. Our technique gives the same benefits as right-linear evaluation of right-linear views, while applying to a significantly more general class of views.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; deductive databases; magic sets; query optimization; tail recursion", subject = "{\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf I.2.3}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Deduction and Theorem Proving, Logic programming.", } @Article{Bell:1996:IDD, author = "Colin Bell and Anil Nerode and Raymond T. Ng and V. S. Subrahmanian", title = "Implementing Deductive Databases by Mixed Integer Programming", journal = j-TODS, volume = "21", number = "2", pages = "238--269", month = jun, year = "1996", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1996-21-2/p238-bell/p238-bell.pdf; http://www.acm.org/pubs/citations/journals/tods/1996-21-2/p238-bell/; http://www.acm.org/pubs/toc/Abstracts/tods/232691.html", abstract = "Existing and past generations of Prolog compilers have left deduction to run-time and this may account for the poor run-time performance of existing Prolog systems. Our work tries to minimize run-time deduction by shifting the deductive process to compile-time. In addition, we offer an alternative inferencing procedure based on translating logic to mixed integer programming. This makes available for research and implementation in deductive databases, all the theorems, algorithms, and software packages developed by the operations research community over the past 50 years. The method keeps the same query language as for disjunctive deductive databases, only the inferencing procedure changes. The language is purely declarative, independent of the order of rules in the program, and independent of the order in which literals occur in clause bodies. The technique avoids Prolog's problem of infinite looping. It saves run-time by doing primary inferencing at compile-time. Furthermore, it is incremental in nature. The first half of this article translates disjunctive clauses, integrity constraints, and database facts into Boolean equations, and develops procedures to use mixed integer programming methods to compute equations, and develops procedures to use mixed integer programming methods to compute equations, and develops procedures to use mixed integer programming methods to compute equations, and develops procedures to use mixed integer programming methods to compute\par ---least models of definite deductive databases, and\par ---minimal models and the Generalized Closed World Assumption of disjunctive databases.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; minimal models; negation and disjunction in deductive databases; theory", subject = "{\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages. {\bf F.4.1}: Theory of Computation, MATHEMATICAL LOGIC AND FORMAL LANGUAGES, Mathematical Logic. {\bf I.2.4}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Knowledge Representation Formalisms and Methods.", } @Article{Guo:1996:SSI, author = "Sha Guo and Wei Sun and Mark A. Weiss", title = "Solving Satisfiability and Implication Problems in Database Systems", journal = j-TODS, volume = "21", number = "2", pages = "270--293", month = jun, year = "1996", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1996-21-2/p270-guo/p270-guo.pdf; http://www.acm.org/pubs/citations/journals/tods/1996-21-2/p270-guo/; http://www.acm.org/pubs/toc/Abstracts/tods/232692.html", abstract = "Satisfiability, implication, and equivalence problems involving conjunctive inequalities are important and widely encountered database problems that need to be efficiently and effectively processed. In this article we consider two popular types of arithmetic inequalities, ({\em X{\bf op\/}Y}) and ({\em X {\bf op\/} C}), where $X$ and $Y$ are attributes, $C$ is a constant of the domain or $X$, and {\bf op}[epsilon] $<$, $ < = $, $=$, ***, $>$, $ > = $. These inequalities are most frequently used in a database system, inasmuch as the former type of inequality represents a 0-join, and the latter is a selection. We study the satisfiability and implication problems under the integer domain and the real domain, as well as under two different operator sets ({$<$, $ < = $, =, $ > = $, $>$} and {$<$, $ < = $, =, ***, $ > = $, $>$}). Our results show that solutions under different domains and/or different operator sets are quite different. Out of these eight cases, excluding two cases that had been shown to be NP-hard, we either report the first necessary and sufficient conditions for these problems as well as their efficient algorithms with complexity analysis (for four cases), or provide an improved algorithm (for two cases). These iff conditions and algorithms are essential to database designers, practitioners, and researchers. These algorithms have been implemented and an experimental study comparing the proposed algorithms and those previously known is conducted. Our experiments show that the proposed algorithms are more efficient than previously known algorithms even for small input. The C++ code can be obtained by an anonymous ftp from \path=archive.fiu.edu=.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; deduction; equivalence; implication; languages; reasoning; satisfiabilty; theory", subject = "{\bf F.2.2}: Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Nonnumerical Algorithms and Problems, Complexity of proof procedures. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf I.2.3}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Deduction and Theorem Proving, Deduction. {\bf I.1.2}: Computing Methodologies, ALGEBRAIC MANIPULATION, Algorithms, Analysis of algorithms.", } @Article{Ciaccia:1996:DKB, author = "Paolo Ciaccia and Paolo Tiberio and Pavel Zezula", title = "Declustering of Key-Based Partitioned Signature Files", journal = j-TODS, volume = "21", number = "3", pages = "295--338", month = sep, year = "1996", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1996-21-3/p295-ciaccia/p295-ciaccia.pdf; http://www.acm.org/pubs/citations/journals/tods/1996-21-3/p295-ciaccia/; http://www.acm.org/pubs/toc/Abstracts/tods/232755.html", abstract = "Access methods based on signature files can largely benefit from possibilities offered by parallel environments. To this end, an effective {\em declustering\/} strategy that would distribute signatures over a set of parallel independent disks has to be combined with a synergic {\em clustering\/} which is employed to avoid searching the whole signature file while executing a query. This article proposes two parallel signature file organizations, Hamming Filter ({\em HF\/}) and Hamming$^+$ Filter ($ H^+F $ ), whose common declustering strategy is based on {\em error correcting codes}, and where clustering is achieved by organizing signatures into fixed-size buckets, each containing signatures sharing the same {\em key\/} value. {\em HF\/} allocates signatures on disks in a static way and works well if a correct relationship holds between the parameters of the code and the size of the file. $ H^+F $ is a generalization of $ H F $ suitable to manage highly dynamic files. It uses a dynamic declustering, obtained through a {\em sequence\/} of codes, and organizes a smooth migration of signatures between disks so that high performance levels are retained regardless of current file size. Theoretical analysis characterizes the best-case, expected, and worst-case behaviors of these organizations. Analytical results are verified by experiments on prototype systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "design; error correcting codes; information retrieval; parallel independent disks; partial match queries; performance; performance evaluation; superimposed coding", subject = "{\bf E.4}: Data, CODING AND INFORMATION THEORY. {\bf E.5}: Data, FILES, Organization/structure. {\bf H.2.2}: Information Systems, DATABASE MANAGEMENT, Physical Design, Access methods. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.3.2}: Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Storage, File organization.", } @Article{Dey:1996:PRM, author = "Debabrata Dey and Sumit Sarkar", title = "A Probabilistic Relational Model and Algebra", journal = j-TODS, volume = "21", number = "3", pages = "339--369", month = sep, year = "1996", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1996-21-3/p339-dey/p339-dey.pdf; http://www.acm.org/pubs/citations/journals/tods/1996-21-3/p339-dey/; http://www.acm.org/pubs/toc/Abstracts/tods/232796.html", abstract = "Although the relational model for databases provides a great range of advantages over other data models, it lacks a comprehensive way to handle incomplete and uncertain data. Uncertainty in data values, however, is pervasive in all real-world environments and has received much attention in the literature. Several methods have been proposed for incorporating uncertain data into relational databases. However, the current approaches have many shortcomings and have not established an acceptable extension of the relational model. In this paper, we propose a consistent extension of the relational model. We present a revised relational structure and extend the relational algebra. The extended algebra is shown to be closed, a consistent extension of the conventional relational algebra, and reducible to the latter.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data incompleteness; data uncertainty; languages; probabilistic relation; probability calculus; relational algebra; relational model; theory", subject = "{\bf H.2.1}: Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf F.4.3}: Theory of Computation, MATHEMATICAL LOGIC AND FORMAL LANGUAGES, Formal Languages, Algebraic language theory. {\bf G.3}: Mathematics of Computing, PROBABILITY AND STATISTICS, Statistical computing. {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Data manipulation languages (DML). {\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf H.2.8}: Information Systems, DATABASE MANAGEMENT, Database applications. {\bf I.2.3}: Computing Methodologies, ARTIFICIAL INTELLIGENCE, Deduction and Theorem Proving, Uncertainty, ``fuzzy,'' and probabilistic reasoning.", } @Article{Ghandeharizadeh:1996:HED, author = "Shahram Ghandeharizadeh and Richard Hull and Dean Jacobs", title = "{Heraclitus}: Elevating Deltas to be First-Class Citizens in a Database Programming Language", journal = j-TODS, volume = "21", number = "3", pages = "370--426", month = sep, year = "1996", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1996-21-3/p370-ghandeharizadeh/p370-ghandeharizadeh.pdf; http://www.acm.org/pubs/citations/journals/tods/1996-21-3/p370-ghandeharizadeh/; http://www.acm.org/pubs/toc/Abstracts/tods/232801.html", abstract = "Traditional database systems provide a user with the ability to query and manipulate one database state, namely the current database state. However, in several emerging applications, the ability to analyze ``what-if'' scenarios in order to reason about the impact of an update (before committing that update) is of paramount importance. Example applications include hypothetical database access, active database management systems, and version management, to name a few. The central thesis of the Heraclitus paradigm is to provide flexible support for applications such as these by elevating {\em deltas}, which represent updates proposed against the current database state, to be first-class citizens. Heraclitus[Alg,C] is a database programming language that extends C to incorporate the relational algebra and deltas. Operators are provided that enable the programmer to explicitly construct, combine, and access deltas. Most interesting is the when operator, that supports hypothetical access to a delta: the expression {\bf E} when [sigma] yields the value that side effect free expression E would have if the value of delta expression [sigma] were applied to the current database state. This article presents a broad overview of the philosophy underlying the Heraclitus paradigm, and describes the design and prototype implementation of Heraclitus[Alg, C]. A model-independent formalism for the Heraclitus paradigm is also presented. To illustrate the utility of Heraclitus, the article presents an in-depth discussion of how Heraclitus[Alg, C] can be used to specify, and thereby implement, a wide range of execution models for rule application in active databases; this includes both prominent execution models presented in the literature, and more recent ``customized'' execution models with novel features.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "active databases; deltas; design; execution model for rule application; hypothetical access; hypothetical database state; languages", subject = "{\bf H.2.3}: Information Systems, DATABASE MANAGEMENT, Languages, Database (persistent) programming languages. {\bf D.3.3}: Software, PROGRAMMING LANGUAGES, Language Constructs and Features. {\bf H.2.4}: Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf H.2.m}: Information Systems, DATABASE MANAGEMENT, Miscellaneous.", } @Article{Kuo:1996:MVD, author = "Dean Kuo", title = "Model and verification of a data manager based on {ARIES}", journal = j-TODS, volume = "21", number = "4", pages = "427--479", month = dec, year = "1996", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1996-21-4/p427-kuo/p427-kuo.pdf; http://www.acm.org/pubs/citations/journals/tods/1996-21-4/p427-kuo/", abstract = "In this article, we model and verify a data manager whose algorithm is based on ARIES. The work uses the I/O automata method as the formal model and the definition of correctness is defined on the interface between the scheduler and the data manager.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Reliability; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "ARIES; I/O automata; reliability; system failures; theory; verification", subject = "{\bf F.3.1} Theory of Computation, LOGICS AND MEANINGS OF PROGRAMS, Specifying and Verifying and Reasoning about Programs, Assertions. {\bf H.2.2} Information Systems, DATABASE MANAGEMENT, Physical Design, Recovery and restart. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing.", } @Article{Litwin:1996:LSD, author = "Witold Litwin and Marie-Anna Neimat and Donovan A. Schneider", title = "{LH*} --- a scalable, distributed data structure", journal = j-TODS, volume = "21", number = "4", pages = "480--525", month = dec, year = "1996", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1996-21-4/p480-litwin/p480-litwin.pdf; http://www.acm.org/pubs/citations/journals/tods/1996-21-4/p480-litwin/", abstract = "We present a scalable distributed data structure called LH*. LH* generalizes Linear Hashing (LH) to distributed RAM and disk files. An LH* file can be created from records with primary keys, or objects with OIDs, provided by any number of distributed and autonomous clients. It does not require a central directory, and grows gracefully, through splits of one bucket at a time, to virtually any number of servers. The number of messages per random insertion is one in general, and three in the worst case, regardless of the file size. The number of messages per key search is two in general, and four in the worst case. The file supports parallel operations, e.g., hash joins and scans. Performing a parallel operation on a file of $M$ buckets costs at most 2 $M$ + 1 messages, and between 1 and $O$ (log 2 $M$ ) rounds of messages.\par We first describe the basic LH* scheme where a coordinator site manages abucket splits, and splits a bucket every time a collision occurs. We show that the average load factor of an LH* file is 65\%--70\% regardless of file size, and bucket capacity. We then enhance the scheme with load control, performed at no additional message cost. The average load factor then increases to 80--95\%. These values are about that of LH, but the load factor for LH* varies more.\par We nest define LH* schemes without a coordinator. We show that insert and search costs are the same as for the basic scheme. The splitting cost decreases on the average, but becomes more variable, as cascading splits are needed to prevent file overload. Next, we briefly describe two variants of splitting policy, using parallel splits and presplitting that should enhance performance for high-performance applications.\par All together, we show that LH* files can efficiently scale to files that are orders of magnitude larger in size than single-site files. LH* files that reside in main memory may also be much faster than single-site disk files. Finally, LH* files can be more efficient than any distributed file with a centralized directory, or a static parallel or distributed hash file.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; data structures; design; distributed access methods; extensible hashing; linear hashing; performance", subject = "{\bf E.2} Data, DATA STORAGE REPRESENTATIONS, Hash-table representations. {\bf E.1} Data, DATA STRUCTURES. {\bf H.2.1} Information Systems, DATABASE MANAGEMENT, Logical Design", } @Article{Raschid:1996:SUR, author = "Louiqa Raschid and Jorge Lobo", title = "Semantics for update rule programs and implementation in a relational database management system", journal = j-TODS, volume = "21", number = "4", pages = "526--572", month = dec, year = "1996", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1996-21-4/p526-raschid/p526-raschid.pdf; http://www.acm.org/pubs/citations/journals/tods/1996-21-4/p526-raschid/", abstract = "In this paper, we present our research on defining a correct semantics for a class of update rule (UR) programs, and discuss implementing these programs in a DBMS environment. Update rules execute by updating relations in a database which may cause the further execution of rules. A correct semantics must guarantee that the execution of the rules will terminate and that it will produce a minimal updated database. The class of UR programs is syntactically identified, based upon a concept that is similar to stratification. We extend that strict definition of stratification and allow a relaxed criterion for partitioning of the rules in the UR program. This relaxation allows a limited degree of nondeterminism in rule execution. We define an execution semantics based upon a monotonic fixpoint operator TUR, resulting in a set of fixpoints for UR. The monotonicity of the operator is maintained by explicitly representing the effect of asserting and retracting tuples in the database. A declarative semantics for the update rule program is obtained by associating a normal logic program UR to represent the UR program. We use the stable model semantics which characterize a normal logic program by a set of minimal models which are called stable models. We show the equivalence between the set of fixpoints for UR and the set of stable models for UR. We briefly discuss implementing the fixpoint semantics of the UR program in a DBMS environment. Relations that can be updated by the rules are {\em updatable\/} relations and they are extended with two flags. An update rule is represented by a database query, which queries the updatable relations as well as database relations, i.e., those relations which are not update by rules. We describe an algorithm to process the queries and compute a fixpoint in the DBMS environment and obtain a final database.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Experimentation; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "constant maintenance; deductive database; fixpoint semantics; rule-based systems; stable model semantics; update rules", subject = "{\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems. {\bf F.4.1} Theory of Computation, MATHEMATICAL LOGIC AND FORMAL LANGUAGES, Mathematical Logic, Model theory. {\bf H.2.3} Information Systems, DATABASE MANAGEMENT, Languages, Data description languages (DDL). {\bf I.2.4} Computing Methodologies, ARTIFICIAL INTELLIGENCE, Knowledge Representation Formalisms and Methods, Predicate logic. {\bf I.2.4} Computing Methodologies, ARTIFICIAL INTELLIGENCE, Knowledge Representation Formalisms and Methods, Representations (procedural and rule-based). {\bf H.2.7} Information Systems, DATABASE MANAGEMENT, Database Administration", } @Article{Keen:1997:EEL, author = "John S. Keen and William J. Dally", title = "Extended ephemeral logging: log storage management for applications with long lived transactions", journal = j-TODS, volume = "22", number = "1", pages = "1--42", month = mar, year = "1997", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1997-22-1/p1-keen/p1-keen.pdf; http://www.acm.org/pubs/citations/journals/tods/1997-22-1/p1-keen/", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; design; disk management; logging; long transactions; OLTP; performance; reliability", subject = "{\bf H.2.7} Information Systems, DATABASE MANAGEMENT, Database Administration, Logging and recovery. {\bf H.2.2} Information Systems, DATABASE MANAGEMENT, Physical Design, Recovery and restart. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing", } @Article{Galindo-Legaria:1997:OSR, author = "C{\'e}sar Galindo-Legaria and Arnon Rosenthal", title = "Outerjoin simplification and reordering for query optimization", journal = j-TODS, volume = "22", number = "1", pages = "43--74", month = mar, year = "1997", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1997-22-1/p43-galindo-legaria/p43-galindo-legaria.pdf; http://www.acm.org/pubs/citations/journals/tods/1997-22-1/p43-galindo-legaria/", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; outerjoins; query optimization; query reordering; theory; verification", subject = "{\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Query processing. {\bf G.2.2} Mathematics of Computing, DISCRETE MATHEMATICS, Graph Theory, Graph algorithms", } @Article{Peters:1997:AMD, author = "Randel J. Peters and M. Tamer {\"O}zsu", title = "An axiomatic model of dynamic schema evolution in objectbase systems", journal = j-TODS, volume = "22", number = "1", pages = "75--114", month = mar, year = "1997", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1997-22-1/p75-peters/p75-peters.pdf; http://www.acm.org/pubs/citations/journals/tods/1997-22-1/p75-peters/", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Management; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; design; dynamic schema evolution; management; object database management systems; theory", subject = "{\bf H.2.1} Information Systems, DATABASE MANAGEMENT, Logical Design. {\bf H.2.1} Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema", } @Article{Wang:1997:LDT, author = "X. Sean Wang and Claudio Bettini and Alexander Brodsky and Sushil Jajodia", title = "Logical design for temporal databases with multiple granularities", journal = j-TODS, volume = "22", number = "2", pages = "115--170", month = jun, year = "1997", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1997-22-2/p115-wang/p115-wang.pdf; http://www.acm.org/pubs/citations/journals/tods/1997-22-2/p115-wang/", abstract = "The purpose of good database logical design is to eliminate data redundancy and insertion and deletion anomalies. In order to achieve this objective for temporal databases, the notions of {\em temporal types}, which formalize time granularities, and {\em temporal functional dependencies\/} (TFDs) are introduced. A temporal type is a monotonic mapping from ticks of time (represented by positive integers) to time sets (represented by subsets of reals) and is used to capture various standard and user-defined calendars. A TFD is a proper extension of the traditional functional dependency and takes the form $ X**Y $ meaning that there is a unique value for $Y$ during one tick of the temporal type [mu] for one particular $X$ value. An axiomatization for TFDs is given. Because a finite set TFDs usually implies an infinite number of TFDs, we introduce the notion of and give an axiomatization for a {\em finite closure\/} to effectively capture a finite set of implied TFDs that are essential of the logical design. Temporal normalization procedures with respect to TFDs are given. Specifically, temporal Boyce-Codd normal form (TBCNF) that avoids all data redundancies due to TFDs, and temporal third normal form (T3NF) that allows dependency preservation, are defined. Both normal forms are proper extensions of their traditional counterparts, BCNF and 3NF. Decomposition algorithms are presented that give lossless TBCNF decompositions and lossless, dependency-preserving, T3NF decompositions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; Boyce-Codd normal form; design; granularity; normalization; temporal databases; temporal modules; temporal relations; theory; third normal form", subject = "{\bf H.2.1} Information Systems, DATABASE MANAGEMENT, Logical Design, Normal forms. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems", } @Article{Clifford:1997:SND, author = "James Clifford and Curtis Dyreson and Tom{\'a}s Isakowitz and Christian S. Jensen and Richard T. Snodgrass", title = "On the semantics of ``now'' in databases", journal = j-TODS, volume = "22", number = "2", pages = "171--214", month = jun, year = "1997", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1997-22-2/p171-clifford/p171-clifford.pdf; http://www.acm.org/pubs/citations/journals/tods/1997-22-2/p171-clifford/", abstract = "Although ``{\em now\/}'' is expressed in SQL and CURRENT\_TIMESTAMP within queries, this value cannot be stored in the database. However, this notion of an ever-increasing current-time value has been reflected in some temporal data models by inclusion of database-resident variables, such as ``{\em now\/}'' ``{\em until-changed},'' ``**,'' ``@,'' and ``-''. Time variables are very desirable, but their used also leads to a new type of database, consisting of tuples with variables, termed a {\em variable database}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Languages; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "indeterminacy; languages; Now; now-relative value; performance; SQL; temporal query language; TSQL2", subject = "{\bf H.2.3} Information Systems, DATABASE MANAGEMENT, Languages, SQL. {\bf H.2.3} Information Systems, DATABASE MANAGEMENT, Languages, Data description languages (DDL). {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Query processing", } @Article{Ammann:1997:AFM, author = "Paul Ammann and Sushil Jajodia and Indrakshi Ray", title = "Applying formal methods to semantic-based decomposition of transactions", journal = j-TODS, volume = "22", number = "2", pages = "215--254", month = jun, year = "1997", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1997-22-2/p215-ammann/p215-ammann.pdf; http://www.acm.org/pubs/citations/journals/tods/1997-22-2/p215-ammann/", abstract = "In some database applications the traditional approach of serializability, in which transactions appear to execute atomically and in isolation on a consistent database state, fails to satisfy performance requirements. Although many researchers have investigated the process of decomposing transactions into steps to increase concurrency, such research typically focuses on providing algorithms necessary to implement a decomposition supplied by the database application developer and pays relatively little attention to what constitutes a desirable decomposition or how the developer should obtain one. We focus on the decomposition itself. A decomposition generates proof obligations whose discharge ensures desirable properties with respect to the original collection of transactions. We introduce the notion of semantic histories to formulate and prove the necessary properties, and the notion of successor sets to describe efficiently the correct interleavings of steps. The successor set constraints use information about conflicts between steps so as to take full advantage of conflict serializability at the level of steps. We propose a mechanism based on two-phase locking to generate correct stepwise serializable histories.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "concurrency control; database management systems; theory; transaction processing; verification", subject = "{\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf D.2.1} Software, SOFTWARE ENGINEERING, Requirements/Specifications, Methodologies. {\bf F.3.1} Theory of Computation, LOGICS AND MEANINGS OF PROGRAMS, Specifying and Verifying and Reasoning about Programs, Invariants. {\bf F.3.1} Theory of Computation, LOGICS AND MEANINGS OF PROGRAMS, Specifying and Verifying and Reasoning about Programs, Pre- and post-conditions. {\bf F.3.1} Theory of Computation, LOGICS AND MEANINGS OF PROGRAMS, Specifying and Verifying and Reasoning about Programs, Specification techniques", } @Article{Wolfson:1997:ADR, author = "Ouri Wolfson and Sushil Jajodia and Yixiu Huang", title = "An adaptive data replication algorithm", journal = j-TODS, volume = "22", number = "2", pages = "255--314", month = jun, year = "1997", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1997-22-2/p255-wolfson/p255-wolfson.pdf; http://www.acm.org/pubs/citations/journals/tods/1997-22-2/p255-wolfson/", abstract = "This article addresses the performance of distributed database systems. Specifically, we present an algorithm for dynamic replication of an object in distributed systems. The algorithm is adaptive in the sense that it changes the replication scheme of the object (i.e., the set of processors at which the object is replicated) as changes occur in the read-write pattern of the object (i.e., the number of reads and writes issued by each processor). The algorithm continuously moves the replication scheme towards an optimal one. We show that the algorithm can be combined with the concurrency control and recovery mechanisms of ta distributed database management system. The performance of the algorithm is analyzed theoretically and experimentally. On the way we provide a lower bound on the performance of any dynamic replication algorithm.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; computer networks; dynamic data allocation; file allocation; performance; replicated data", subject = "{\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Distributed systems. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed applications. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Design studies. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Measurement techniques. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Modeling techniques. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS, Reliability, availability, and serviceability", } @Article{Franklin:1997:TCS, author = "Michael J. Franklin and Michael J. Carey and Miron Livny", title = "Transactional client-server cache consistency: alternatives and performance", journal = j-TODS, volume = "22", number = "3", pages = "315--363", month = sep, year = "1997", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1997-22-3/p315-franklin/p315-franklin.pdf; http://www.acm.org/pubs/citations/journals/tods/1997-22-3/p315-franklin/; http://www.acm.org:80/pubs/citations/journals/tods/1997-22-3/p315-franklin/", abstract = "Client-server database systems based on a data shipping model can exploit client memory resources by caching copies of data items across transaction boundaries. Caching reduces the need to obtain data from servers or other sites on the network. In order to ensure that such caching does not result in the violation of transaction semantics, a transactional cache consistency maintenance algorithm is required. Many such algorithms have been proposed in the literature and, as all provide the same functionality, performance is a primary concern in choosing among them. In this article we present a taxonomy that describes the design space for transactional cache consistency maintenance algorithms and show how proposed algorithms relate to one another. We then investigate the performance of six of these algorithms, and use these results to examine the tradeoffs inherent in the design choices identified in the taxonomy. The results show that the interactions among dimensions of the design space impact performance in many ways, and that classifications of algorithms as simply ``pessimistic'' or ``optimistic'' do not accurately characterize the similarities and differences among the many possible cache consistency algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; design; performance", subject = "{\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS. {\bf D.4.8} Software, OPERATING SYSTEMS, Performance. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Concurrency.", } @Article{Eiter:1997:DD, author = "Thomas Eiter and Georg Gottlob and Heikki Mannila", title = "Disjunctive {Datalog}", journal = j-TODS, volume = "22", number = "3", pages = "364--418", month = sep, year = "1997", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1997-22-3/p364-eiter/p364-eiter.pdf; http://www.acm.org/pubs/citations/journals/tods/1997-22-3/p364-eiter/; http://www.acm.org:80/pubs/citations/journals/tods/1997-22-3/p364-eiter/", abstract = "We consider disjunctive Datalog, a powerful database query language based on disjunctive logic programming. Briefly, disjunctive Datalog is a variant of Datalog where disjunctions may appear in the rule heads; advanced versions also allow for negation in the bodies which can be handled according to a semantics for negation in disjunctive logic programming. In particular, we investigate three different semantics for disjunctive Datalog: the minimal model semantics the perfect model semantics, and the stable model semantics. For each of these semantics, the expressive power and complexity are studied. We show that the possibility variants of these semantics express the same set of queries. In fact, they precisely capture the complexity class {\em P2}. Thus, unless the Polynomial Hierarchy collapses, disjunctive Datalog is more expressive that normal logic programming with negation. These results are not only of theoretical interest; we demonstrate that problems relevant in practice such as computing the optimal tour value in the Traveling Salesman Problem and eigenvector computations can be handled in disjunctive Datalog, but not Datalog with negation (unless the Polynomial Hierarchy collapses). In addition, we study modularity properties of disjunctive Datalog and investigate syntactic restrictions of the formalisms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "languages; theory", subject = "{\bf H.2.3} Information Systems, DATABASE MANAGEMENT, Languages, Datalog. {\bf H.2.3} Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf D.1.6} Software, PROGRAMMING TECHNIQUES, Logic Programming. {\bf I.2.3} Computing Methodologies, ARTIFICIAL INTELLIGENCE, Deduction and Theorem Proving. {\bf I.2.4} Computing Methodologies, ARTIFICIAL INTELLIGENCE, Knowledge Representation Formalisms and Methods. {\bf F.2.2} Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Nonnumerical Algorithms and Problems. {\bf F.4.1} Theory of Computation, MATHEMATICAL LOGIC AND FORMAL LANGUAGES, Mathematical Logic, Logic and constraint programming.", } @Article{Lakshmanan:1997:PFP, author = "Laks V. S. Lakshmanan and Nicola Leone and Robert Ross and V. S. Subrahmanian", title = "{ProbView}: a flexible probabilistic database system", journal = j-TODS, volume = "22", number = "3", pages = "419--469", month = sep, year = "1997", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1997-22-3/p419-lakshmanan/p419-lakshmanan.pdf; http://www.acm.org/pubs/citations/journals/tods/1997-22-3/p419-lakshmanan/; http://www.acm.org:80/pubs/citations/journals/tods/1997-22-3/p419-lakshmanan/", abstract = "Probability theory is mathematically the best understood paradigm for modeling and manipulating uncertain information. Probabilities of complex events can be computed from those of basic events on which they depend, using any of a number of strategies. Which strategy is appropriate depends very much on the known interdependencies among the events involved. Previous work on probabilistic databases has assumed a {\em fixed\/} and {\em restrictive\/} combination strategy (e.g., assuming all events are pairwise independent). In this article, we characterize, using postulates, whole classes of strategies for conjunction, disjunction, and negation, meaningful from the viewpoint of probability theory. (1) We propose a probabilistic relational data model and a {\em generic\/}probabilistic relational algebra that neatly captures {\em various strategies\/} satisfying the postulates, within a {\em single unified framework.} (2) We show that as long as the chosen strategies can be computed in polynomial time, queries in the positive fragment of the probabilistic relational algebra have essentially the same data complexity as classical relational algebra. (3) We establish various containments and equivalences between algebraic expressions, similar in spirit to those in classical algebra. (4) We develop algorithms for maintaining materialized probabilistic views. (5) Based on these ideas, we have developed a prototype probabilistic database system called ProbView on top of Dbase V.0. We validate our complexity results with experiments and show that rewriting certain types of queries to other equivalent forms often yields substantial savings.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Languages; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; languages; performance; probabilistic databases; theory; view maintenance", subject = "{\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems. {\bf H.2.3} Information Systems, DATABASE MANAGEMENT, Languages. {\bf I.2.3} Computing Methodologies, ARTIFICIAL INTELLIGENCE, Deduction and Theorem Proving.", } @Article{Storey:1997:DDC, author = "Veda C. Storey and Roger H. L. Chiang and Debabrata Dey and Robert C. Goldstein and Shankar Sundaresan", title = "Database design with common sense business reasoning and learning", journal = j-TODS, volume = "22", number = "4", pages = "471--512", month = dec, year = "1997", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1997-22-4/p471-storey/p471-storey.pdf; http://www.acm.org/pubs/citations/journals/tods/1997-22-4/p471-storey/; http://www.acm.org:80/pubs/citations/journals/tods/1997-22-4/p471-storey/", abstract = "Automated database design systems embody knowledge about the database design process. However, their lack of knowledge about the domains for which databases are being developed significantly limits their usefulness. A methodology for acquiring and using general world knowledge about business for database design has been developed and implemented in a system called the Common Sense Business Reasoner, which acquires facts about application domains and organizes them into a hierarchical, context-dependent knowledge base. This knowledge is used to make intelligent suggestions to a user about the entities, attributes, and relationships to include in a database design. A distance function approach is employed for integrating specific facts, obtained from individual design sessions, into the knowledge base (learning) and for applying the knowledge to subsequent design problems (reasoning).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "common sense business reasoner; common sense learning; common sense reasoning; database design; design; entity-relationship model; theory", subject = "{\bf D.2.1} Software, SOFTWARE ENGINEERING, Requirements/Specifications. {\bf H.2.1} Information Systems, DATABASE MANAGEMENT, Logical Design. {\bf H.2.8} Information Systems, DATABASE MANAGEMENT, Database Applications. {\bf I.2.4} Computing Methodologies, ARTIFICIAL INTELLIGENCE, Knowledge Representation Formalisms and Methods. {\bf I.2.6} Computing Methodologies, ARTIFICIAL INTELLIGENCE, Learning. {\bf I.2.1} Computing Methodologies, ARTIFICIAL INTELLIGENCE, Applications and Expert Systems.", } @Article{Tari:1997:ONF, author = "Zahir Tari and John Stokes and Stefano Spaccapietra", title = "Object normal forms and dependency constraints for object-oriented schemata", journal = j-TODS, volume = "22", number = "4", pages = "513--569", month = dec, year = "1997", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1997-22-4/p513-tari/p513-tari.pdf; http://www.acm.org/pubs/citations/journals/tods/1997-22-4/p513-tari/", abstract = "We address the development of a normalization theory for object-oriented data models that have common features to support objects. We first provide an extension of functional dependencies to cope with the richer semantics of relationships between objects, called {\em path dependency}, {\em local dependency}, and {\em global dependency\/} constraints. Using these dependency constraints, we provide normal forms for object-oriented data models based on the notions of {\em user interpretation\/} (user-specified dependency constraints) and {\em object model}. In contrast to conventional data models in which a normalized object has a unique interpretation, in object-oriented data models, an object may have many multiple interpretations that form the model for that object. An object will then be in a normal form if and only if the user's interpretation is derivable from the model of the object. Our normalization process is by nature iterative, in which objects are restructured until their models reflect the user's interpretation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data model; functional and multivalued dependencies; normal forms; object-oriented paradigm", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Normal forms}; Information Systems --- Database Management --- Systems (H.2.4)", } @Article{Zaharioudakis:1997:AFG, author = "Markos Zaharioudakis and Michael J. Carey and Michael J. Franklin", title = "Adaptive, fine-grained sharing in a client-server {OODBMS}: a callback-based approach", journal = j-TODS, volume = "22", number = "4", pages = "570--627", month = dec, year = "1997", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1997-22-4/p570-zaharioudakis/p570-zaharioudakis.pdf; http://www.acm.org/pubs/citations/journals/tods/1997-22-4/p570-zaharioudakis/", abstract = "For reasons of simplicity and communication efficiency, a number of existing object-oriented database management systems are based on page server architectures; data pages are their minimum unit of transfer and client caching. Despite their efficiency, page servers are often criticized as being too restrictive when it comes to concurrency, as existing systems use pages as the minimum locking unit as well. In this paper we show how to support object-level locking in a page-server context. Several approaches are described, including an adaptive granularity approach that uses page-level locking for most pages but switches to object-level locking when finer-grained sharing is demanded. Each of the approaches is based on extending the idea of callback locking. We study the performance of these approaches, comparing them to both a pure page server and a pure object server. For the range of workload that we have examined, our results indicate that the adaptive page server provides very good performance, usually outperforming the pure page server and the other page-server variants as well. In addition, the adaptive page server is often preferable to the pure object server; our results provides insight into when each approach is likely to perform better.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "cache coherency; cache consistency; client-server database; design; fine-grained sharing; object-oriented databases; performance; performance analysis", subject = "{\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Concurrency; {\bf H.3.4} Information Systems, INFORMATION STORAGE AND RETRIEVAL, Systems and Software, Distributed systems; {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing", } @Article{Anonymous:1997:AI, author = "Anonymous", title = "1997 Author Index", journal = j-TODS, volume = "22", number = "4", pages = "628--??", month = "????", year = "1997", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Nov 7 10:36:24 MST 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Dyreson:1998:SVT, author = "Curtis E. Dyreson and Richard T. Snodgrass", title = "Supporting valid-time indeterminacy", journal = j-TODS, volume = "23", number = "1", pages = "1--57", month = mar, year = "1998", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1998-23-1/p1-dyreson/p1-dyreson.pdf; http://www.acm.org/pubs/citations/journals/tods/1998-23-1/p1-dyreson/; http://www.acm.org:80/pubs/citations/journals/tods/1998-23-1/p1-dyreson/", abstract = "In {\em valid-time indeterminacy\/} it is known that an event stored in a database did in fact occur, but it is not known exactly {\em when}. In this paper we extend the SQL data model and query language to support valid-time indeterminacy. We represent the occurrence time of an event with a set of possible instants, delimiting when the event might have occurred, and a probability distribution over that set. We also describe query language constructs to retrieve information in the presence of indeterminacy. These constructs enable users to specify their {\em credibility\/} in the underlying data and their {\em plausibility\/} in the relationships among that data. A denotational semantics for SQL's select statement with optional credibility and plausibility constructs is given. We show that this semantics is {\em reliable}, in that it never produces incorrect information, is {\em maximal}, in that if it were extended to be more informative, the results may not be reliable, and {\em reduces\/} to the previous semantics when there is no indeterminacy. Although the extended data model and query language provide needed modeling capabilities, these extensions appear initially to carry a significant execution cost. A contribution of this paper is to demonstrate that our approach is useful and practical. An efficient representation of valid-time indeterminacy and efficient query processing algorithms are provided. The cost of support for indeterminacy is empirically measured, and is shown to be modest. Finally, we show that the approach is general, by applying it to the temporal query language constructs being proposed for SQL3.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Languages", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; incomplete information; indeterminacy; languages; probabilistic information; SQL; temporal database; TSQL2; valid-time database", subject = "{\bf H.2.1} Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.3} Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Query processing.", } @Article{Revesz:1998:SQL, author = "Peter Z. Revesz", title = "Safe query languages for constraint databases", journal = j-TODS, volume = "23", number = "1", pages = "58--99", month = mar, year = "1998", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1998-23-1/p58-revesz/p58-revesz.pdf; http://www.acm.org/pubs/citations/journals/tods/1998-23-1/p58-revesz/; http://www.acm.org:80/pubs/citations/journals/tods/1998-23-1/p58-revesz/", abstract = "In the database framework of Kanellakis et al. [1990] it was argued that constraint query languages should take constraint databases as input and give other constraint databases that use the same type of atomic constraints as output. This closed-form requirement has been difficult to realize in constraint query languages that contain the negation symbol. This paper describes a general approach to restricting constraint query languages with negation to safe subsets that contain only programs that are evaluable in closed-form on any valid constraint database input.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Languages; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; languages; theory; verification", subject = "{\bf H.2.1} Information Systems, DATABASE MANAGEMENT, Logical Design. {\bf H.2.3} Information Systems, DATABASE MANAGEMENT, Languages, Datalog.", } @Article{Stolboushkin:1998:SSD, author = "Alexei P. Stolboushkin and Michael A. Taitslin", title = "Safe stratified datalog with integer order does not have syntax", journal = j-TODS, volume = "23", number = "1", pages = "100--109", month = mar, year = "1998", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1998-23-1/p100-stolboushkin/p100-stolboushkin.pdf; http://www.acm.org/pubs/citations/journals/tods/1998-23-1/p100-stolboushkin/", abstract = "Stratified Datalog with integer (gap)-order (or {\em Datalog***\/}) is considered. A {\em Datalog***\/}-program is said to be safe if its bottom-up processing terminates on all valid inputs. We prove that safe {\em Datalog***\/}-programs do not have effective syntax in the sense that there is no recursively enumerable set $S$ of safe {\em Datalog***\/}-programs such that every safe {\em Datalog***\/}-program is equivalent to a program in $S$.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Languages; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "languages; theory; verification", subject = "{\bf H.2.1} Information Systems, DATABASE MANAGEMENT, Logical Design; {\bf H.2.3} Information Systems, DATABASE MANAGEMENT, Languages.", } @Article{Anonymous:1998:TR, author = "Anonymous", title = "{TODS} Referees", journal = j-TODS, volume = "23", number = "1", pages = "110--111", month = mar, year = "1998", bibdate = "Mon Jan 18 18:22:17 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Anonymous:1998:C, author = "Anonymous", title = "Corrigenda", journal = j-TODS, volume = "23", number = "1", pages = "112--112", month = mar, year = "1998", bibdate = "Mon Jan 18 18:22:17 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Hellerstein:1998:OTQ, author = "Joseph M. Hellerstein", title = "Optimization techniques for queries with expensive methods", journal = j-TODS, volume = "23", number = "2", pages = "113--157", month = jun, year = "1998", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1998-23-2/p113-hellerstein/p113-hellerstein.pdf; http://www.acm.org/pubs/citations/journals/tods/1998-23-2/p113-hellerstein/; http://www.acm.org:80/pubs/citations/journals/tods/1998-23-2/p113-hellerstein/", abstract = "Object-relational database management systems allow knowledgeable users to define new data types as well as new methods (operators) for the types. This flexibility produces an attendant complexity, which must be handled in new ways for an object-relational database management system to be efficient. In this article we study techniques for optimizing queries that contain time-consuming methods. The focus of traditional query optimizers has been on the choice of join methods and orders; selections have been handled by ``pushdown'' rules. These rules apply selections in an arbitrary order before as many joins as possible, using th e assumption that selection takes no time. However, users of object-relational systems can embed complex methods in selections. Thus selections may take significant amounts of time, and the query optimization model must be enhanced. In this article we carefully define a query cost framework that incorporates both selectivity and cost estimates for selections. We develop an algorithm called Predicate Migration, and prove that it produces optimal plans for queries with expensive methods. We then describe our implementation of Predicate Migration in the commercial object-relational database management system Illustra, and discuss practical issues that affect our earlier assumptions. We compare Predicate Migration to a variety of simpler optimization techniques, and demonstrate that Predicate Migration is the best general solution to date. The alternative techniques we present may be useful for constrained workloads.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; expensive methods; extensibility; object-relational databases; performance; predicate migration; predicate placement; query optimization; theory", subject = "{\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Query processing.", } @Article{Liu:1998:MAP, author = "Xiangning Liu and Abdelsalam Helal and Weimin Du", title = "Multiview access protocols for large-scale replication", journal = j-TODS, volume = "23", number = "2", pages = "158--198", month = jun, year = "1998", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1998-23-2/p158-liu/p158-liu.pdf; http://www.acm.org/pubs/citations/journals/tods/1998-23-2/p158-liu/; http://www.acm.org:80/pubs/citations/journals/tods/1998-23-2/p158-liu/", abstract = "The article proposes a scalable protocol for replication management in large-scale replicated systems. The protocol organizes sites and data replicas into a tree-structured, hierarchical cluster architecture. The basic idea of the protocol is to accomplish the complex task of updating replicated data with a very large number of replicas by a set of related but independently committed transactions. Each transaction is responsible for updating replicas in exactly one cluster and invoking additional transactions for member clusters. Primary copies (one from each cluster) are updated by a cross-cluster transaction. Then each cluster is independently updated by a separate transaction. This decoupled update propagation process results in possible multiple views of replicated data in a cluster. Compared to other replicated data management protocols, the proposed protocol has several unique advantages. First, thanks to a smaller number of replicas each transaction needs to atomically update in a cluster, the protocol significantly reduces the transaction abort rate, which tends to soar in large transactional systems. Second, the protocol improves user-level transaction response time as top-level update transactions are allowed to commit before all replicas have been updated. Third, read-only queries have the flexibility to see database views of different degrees of consistency and data currency. This ranges from global, most up to date, and consistent views, to local, consistent, but potentially old views, to local, nearest to users but potentially inconsistent views. Fourth, the protocol maintains its scalability by allowing dynamic system reconfiguration as it grows by splitting a cluster into two or more smaller ones. Fifth, autonomy of the clusters is preserved as no specific protocol is required to update replicas within the same cluster. Clusters are, therefore, free to use any valid replication or concurrency control protocols.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Experimentation; Management; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; data replication; design; experimentation; large-scale systems; management; measurement; multiview access; performance", subject = "{\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Distributed databases. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing. {\bf C.2.4} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Distributed Systems, Distributed databases.", } @Article{Mehrotra:1998:ECM, author = "Sharad Mehrotra and Rajeev Rastogi and Henry F. Korth and Abraham Silberschatz", title = "Ensuring consistency in multidatabases by preserving two-level serializability", journal = j-TODS, volume = "23", number = "2", pages = "199--230", month = jun, year = "1998", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1998-23-2/p199-mehrotra/p199-mehrotra.pdf; http://www.acm.org/pubs/citations/journals/tods/1998-23-2/p199-mehrotra/; http://www.acm.org:80/pubs/citations/journals/tods/1998-23-2/p199-mehrotra/", abstract = "The concept of serializability has been the traditionally accepted correctness criterion in database systems. However in multidatabase systems (MDBSs), ensuring global serializability is a difficult task. The difficulty arises due to the {\em heterogeneity\/} of the concurrency control protocols used by the participating local database management systems (DBMSs), and the desire to preserve the {\em autonomy\/} of the local DBMSs. In general, solutions to the global serializability problem result in executions with a low degree of concurrency. The alternative, relaxed serializability, may result in data inconsistency.\par In this article, we introduce a systematic approach to relaxing the serializability requirement in MDBS environments. Our approach exploits the structure of the integrity constraints and the nature of transaction programs to ensure consistency without requiring executions to be serializable. We develop a simple yet powerful classification of MDBSs based on the nature of integrity constraints and transaction programs. For each of the identified models we show how consistency can be preserved by ensuring that executions are {\em two-level serializable\/} (2LSR). 2LSR is a correctness criterion for MDBS environments weaker than serializability. What makes our approach interesting is that unlike global serializability, ensuring 2LSR in MDBS environments is relatively simple and protocols to ensure 2LSR permit a high degree of concurrency. Furthermore, we believe the range of models we consider cover many practical MDBS environments to which the results of this article can be applied to preserve database consistency.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Management; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "beyond serializability; concurrency control; database consistency; management; multidatabases; theory", subject = "{\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Concurrency. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Distributed databases. {\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Transaction processing.", } @Article{Bertino:1998:ACM, author = "Elisa Bertino and Claudio Bettini and Elena Ferrari and Pierangela Samarati", title = "An access control model supporting periodicity constraints and temporal reasoning", journal = j-TODS, volume = "23", number = "3", pages = "231--285", month = sep, year = "1998", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/citations/journals/tods/1998-23-3/p231-bertino/; http://www.acm.org:80/pubs/citations/journals/tods/1998-23-3/p231-bertino/", abstract = "Access control models, such as the ones supported by commercial DBMSs, are not yet able to fully meet many application needs. An important requirement derives from the temporal dimension that permissions have in many real-world situations. Permissions are often limited in time or may hold only for specific periods of time. In this article, we present an access control model in which periodic temporal intervals are associated with authorizations. An authorization is automatically granted in the specified intervals and revoked when such intervals expire. Deductive temporal rules with periodicity and order constraints are provided to derive new authorizations based on the presence or absence of other authorizations in specific periods of time. We provide a solution to the problem of ensuring the uniqueness of the global set of valid authorizations derivable at each instant, and we propose an algorithm to compute this set. Moreover, we address issues related to the efficiency of access control by adopting a materialization approach. The resulting model provides a high degree of flexibility and supports the specification of several protection requirements that cannot be expressed in traditional access control models.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Security", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "access control; periodic authorization; security; temporal constraints; time management", subject = "{\bf H.2.7} Information Systems, DATABASE MANAGEMENT, Database Administration, Security, integrity, and protection.", } @Article{Castano:1998:CSA, author = "S. Castano and V. {De Antonellis} and M. G. Fugini and B. Pernici", title = "Conceptual schema analysis: techniques and applications", journal = j-TODS, volume = "23", number = "3", pages = "286--333", month = sep, year = "1998", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1998-23-3/p286-castano/p286-castano.pdf; http://www.acm.org/pubs/citations/journals/tods/1998-23-3/p286-castano/; http://www.acm.org:80/pubs/citations/journals/tods/1998-23-3/p286-castano/", abstract = "The problem of analyzing and classifying conceptual schemas is becoming increasingly important due to the availability of a large number of schemas related to existing applications. The purposes of schema analysis and classification activities can be different: to extract information on intensional properties of legacy systems in order to restructure or migrate to new architectures; to build libraries of reference conceptual components to be used in building new applications in a given domain; and to identify information flows and possible replication of data in an organization. This article proposes a set of techniques for schema analysis and classification to be used separately or in combination. The techniques allow the analyst to derive significant properties from schemas, with human intervention limited as far as possible. In particular, techniques for associating descriptors with schemas, for abstracting reference conceptual schemas based on schema clustering, and for determining schema similarity are presented. A methodology for systematic schema analysis is illustrated, with the purpose of identifying and abstracting into reference components the similar and potentially reusable parts of a set of schemas. Experiences deriving from the application of the proposed techniques and methodology on a large set of Entity-Relationship conceptual schemas of information systems in the Italian Public Administration domain are described", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Documentation; Management", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "conceptual modeling; design; documentation; management; reference components; schema classification; schema similarity", subject = "{\bf H.2.1} Information Systems, DATABASE MANAGEMENT, Logical Design. {\bf H.3.1} Information Systems, INFORMATION STORAGE AND RETRIEVAL, Content Analysis and Indexing.", } @Article{Formica:1998:EMC, author = "A. Formica and H. D. Groger and M. Missikoff", title = "An efficient method for checking object-oriented database schema correctness", journal = j-TODS, volume = "23", number = "3", pages = "334--369", month = sep, year = "1998", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1998-23-3/p334-formica/p334-formica.pdf; http://www.acm.org/pubs/citations/journals/tods/1998-23-3/p334-formica/; http://www.acm.org:80/pubs/citations/journals/tods/1998-23-3/p334-formica/", abstract = "Inheritance is introduced in object-oriented systems to enhance code reuse and create more compact and readable software. Powerful object models adopt multiple inheritance, allowing a type (or class) definition to inherit from more than one supertype. Unfortunately, in applying this powerful modeling mechanism, inheritance conflicts may be generated, which arise when the same property or operation is defined in more than one supertype. Inheritance conflicts identification and resolution is the key issue of this article. In strongly typed object-oriented systems the resolution of inheritance conflicts depends on the compatibility of the types of the conflicting definitions. In case of incompatible types, a contradiction arises. This article focuses on object-oriented databases (ODBs), providing a method aimed at supporting the designer in the construction of correct ODB schemas. The first necessary condition for schema correctness is the absence of contradictions. A second cause of schema incorrectness is due to the presence of structurally recursive types that, when defined within certain hierarchical patterns, cause the nontermination of the inheritance process. In the article, after the formal definition of a correct schema, two graph-theoretic methods aimed at verifying ODB schema correctness are analyzed. Although the first method is intuitive but inefficient, the second allows schema correctness to be checked in polynomial time, in the size of the schema. The results of this study are included in the implementation of Mosaico, an environment for ODB application design.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "databases; design; graph theory; inheritance conflicts; inheritance process; languages; object-oriented database schemas; recursive types; theory; verification", subject = "{\bf H.2.4} Information Systems, DATABASE MANAGEMENT, Systems, Object-oriented databases. {\bf D.3.3} Software, PROGRAMMING LANGUAGES, Language Constructs and Features, Data types and structures. {\bf F.3.1} Theory of Computation, LOGICS AND MEANINGS OF PROGRAMS, Specifying and Verifying and Reasoning about Programs, Mechanical verification. {\bf H.2.3} Information Systems, DATABASE MANAGEMENT, Languages, Data description languages (DDL). {\bf H.2.1} Information Systems, DATABASE MANAGEMENT, Logical Design, Data models. {\bf H.2.1} Information Systems, DATABASE MANAGEMENT, Logical Design, Schema and subschema.", } @Article{Konopnicki:1998:IGW, author = "David Konopnicki and Oded Shmueli", title = "Information gathering in the {World-Wide Web}: the {W3QL} query language and the {W3QS} system", journal = j-TODS, volume = "23", number = "4", pages = "369--410", month = dec, year = "1998", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1998-23-4/p369-konopnicki/p369-konopnicki.pdf; http://www.acm.org/pubs/citations/journals/tods/1998-23-4/p369-konopnicki/; http://www.acm.org:80/pubs/citations/journals/tods/1998-23-4/p369-konopnicki/", abstract = "The World Wide Web (WWW) is a fast growing global information resource. It contains an enormous amount of information and provides access to a variety of services. Since there is no central control and very few standards of information organization or service offering, searching for information and services is a widely recognized problem. To some degree this problem is solved by ``search services,'' also known as ``indexers,'' such as Lycos, AltaVista, Yahoo, and others. These sites employ search engines known as ``robots'' or ``knowbots'' that scan the network periodically and form text-based indices. These services are limited in certain important aspects. First, the structural information, namely, the organization of the document into parts pointing to each other, is usually lost. Second, one is limited by the kind of textual analysis provided by the ``search service.'' Third, search services are incapable of navigating ``through'' forms. Finally, one cannot prescribe a complex database-like search. We view the WWW as a huge database. We have designed a high-level SQL-like language called W3QL to support effective and flexible query processing, which addresses the structure and content of WWW nodes and their varied sorts of data. We have implemented a system called W3QS to execute W3QL queries. In W3QS, query results are declaratively specified and continuously maintained as views when desired. The current architecture of W3QS provides a server that enables users to pose queries as well as integrate their own data analysis tools. The system and its query language set a framework for the development of database-like tools over the WWW. A significant contribution of this article is in formalizing the WWW and query processing over it.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "CGI; design; FORMS; HTML; HTTP; languages; PERL; query language; query system; World-Wide Web", subject = "{\bf H.2.3} Information Systems, DATABASE MANAGEMENT, Languages, Query languages. {\bf H.1.0} Information Systems, MODELS AND PRINCIPLES, General. {\bf H.3.3} Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval.", } @Article{Sistla:1998:TTC, author = "A. Prasad Sistla and Ouri Wolfson and Yelena Yesha and Robert Sloan", title = "Towards a theory of cost management for digital libraries and electronic commerce", journal = j-TODS, volume = "23", number = "4", pages = "411--452", month = dec, year = "1998", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1998-23-4/p411-sistla/p411-sistla.pdf; http://www.acm.org/pubs/citations/journals/tods/1998-23-4/p411-sistla/; http://www.acm.org:80/pubs/citations/journals/tods/1998-23-4/p411-sistla/", abstract = "One of the features that distinguishes digital libraries from traditional databases is new cost models for client access to intellectual property. Clients will pay for accessing data items in digital libraries, and we believe that optimizing these costs will be as important as optimizing performance in traditional databases. In this article we discuss cost models and protocols for accessing digital libraries, with the objective of determining the minimum cost protocol for each model. We expect that in the future information appliances will come equipped with a cost optimizer, in the same way that computers today come with a built-in operating system. This article makes the initial steps towards a theory and practice of intellectual property cost management.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Economics; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; average case analysis; caching; cost models; demand; economics; on-line services; performance; protocols; subscription; theory; worst case analysis", subject = "{\bf H.2.m} Information Systems, DATABASE MANAGEMENT, Miscellaneous. {\bf H.3.5} Information Systems, INFORMATION STORAGE AND RETRIEVAL, Online Information Services, Commercial services. {\bf H.3.5} Information Systems, INFORMATION STORAGE AND RETRIEVAL, Online Information Services, Web-based services. {\bf H.3.7} Information Systems, INFORMATION STORAGE AND RETRIEVAL, Digital Libraries, Dissemination.", } @Article{Zobel:1998:IFV, author = "Justin Zobel and Alistair Moffat and Kotagiri Ramamohanarao", title = "Inverted files versus signature files for text indexing", journal = j-TODS, volume = "23", number = "4", pages = "453--490", month = dec, year = "1998", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1998-23-4/p453-zobel/p453-zobel.pdf; http://www.acm.org/pubs/citations/journals/tods/1998-23-4/p453-zobel/; http://www.acm.org:80/pubs/citations/journals/tods/1998-23-4/p453-zobel/", abstract = "Two well-known indexing methods are inverted files and signature files. We have undertaken a detailed comparison of these two approaches in the context of text indexing, paying particular attention to query evaluation speed and space requirements. We have examined their relative performance using both experimentation and a refined approach to modeling of signature files, and demonstrate that inverted files are distinctly superior to signature files. Not only can inverted files be used to evaluate typical queries in less time than can signature files, but inverted files require less space and provide greater functionality. Our results also show that a synthetic text database can provide a realistic indication of the behavior of an actual text database. The tools used to generate the synthetic database have been made publicly available", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "algorithms; indexing; inverted files; performance; signature files; text databases; text indexing", subject = "{\bf E.5} Data, FILES. {\bf H.2.2} Information Systems, DATABASE MANAGEMENT, Physical Design. {\bf H.3.3} Information Systems, INFORMATION STORAGE AND RETRIEVAL, Information Search and Retrieval. {\bf I.7.3} Computing Methodologies, DOCUMENT AND TEXT PROCESSING, Index Generation**.", } @Article{Datta:1999:BPS, author = "Anindya Datta and Debra E. Vandermeer and Aslihan Celik and Vijay Kumar", title = "Broadcast protocols to support efficient retrieval from databases by mobile users", journal = j-TODS, volume = "24", number = "1", pages = "1--79", month = mar, year = "1999", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Sep 26 08:44:02 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/citations/journals/tods/1999-24-1/p1-datta/", abstract = "Mobile computing has the potential for managing information globally. Data management issues in mobile computing have received some attention in recent times, and the design of {\em adaptive broadcast protocols\/} has been posed as an important problem. Such protocols are employed by database servers to decide on the content of broadcasts dynamically, in response to client mobility and demand patterns. In this paper we design such protocols and also propose efficient retrieval strategies that may be employed by clients to download information from broadcasts. The goal is to design {\em cooperative\/} strategies between server and client to provide access to information in such a way as to minimize energy expenditure by clients. We evaluate the performance of our protocols both analytically and through simulation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "adaptive broadcast protocols; client-server computing; energy conservation; mobile databases", subject = "Computer Systems Organization --- Computer-Communication Networks --- Network Architecture and Design (C.2.1); Information Systems --- Database Management (H.2); Computer Systems Organization --- Computer-Communication Networks --- Network Protocols (C.2.2); Information Systems --- Database Management --- Systems (H.2.4); Computer Systems Organization --- Computer-Communication Networks --- Network Architecture and Design (C.2.1): {\bf Wireless communication}; Information Systems --- Information Systems Applications --- Communications Applications (H.4.3); Information Systems --- Information Systems Applications --- Communications Applications (H.4.3): {\bf Internet}", } @Article{Levene:1999:DDI, author = "Mark Levene and George Loizou", title = "Database design for incomplete relations", journal = j-TODS, volume = "24", number = "1", pages = "80--126", month = mar, year = "1999", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Sep 26 08:44:02 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/citations/journals/tods/1999-24-1/p80-levene/", abstract = "Although there has been a vast amount of research in the area of relational database design, to our knowledge, there has been very little work that considers whether this theory is still valid when relations in the database may be incomplete. When relations are incomplete and thus contain null values the problem of whether satisfaction is additive arises. Additivity is the property of the equivalence of the satisfaction of a set of functional dependencies (FDs) F with the individual satisfaction of each member of F in an incomplete relation. It is well known that in general, satisfaction of FDs is not additive. Previously we have shown that satisfaction is additive if and only if the set of FDs is monodependent. We conclude that monodependence is a fundamental desirable property of a set of FDs when considering incomplete information in relational database design. We show that, when the set of FDs F either satisfies the intersection property or the split-freeness property, then the problem of finding an optimum cover of F can be solved in polynomial time in the size of F; in general, this problem is known to be NP-complete. We also show that when F satisfies the split-freeness property then deciding whether there is a superkey of cardinality k or less can be solved in polynomial time in the size of F, since all the keys have the same cardinality. If F only satisfies the intersection property then this problem is NP-complete, as in the general case. Moreover, we show that when F either satisfies the intersection property or the split-freeness property then deciding whether an attribute is prime can be solved in polynomial time in the size of F; in general, this problem is known to be NP-complete. Assume that a relation schema R is an appropriate normal form with respect to a set of FDs F. We show that when F satisfies the intersection property then the notions of second normal form and third normal form are equivalent. We also show that when R is in Boyce-Codd Normal Form (BCNF), then F is monodependent if and only if either there is a unique key for R, or for all keys X for R, the cardinality of X is one less than the number of attributes associated with R. Finally, we tackle a long-standing problem in relational database theory by showing that when a set of FDs F over R satisfies the intersection property, it also satisfies the split-freeness property (i.e., is monodependent), if and only if every lossless join decomposition of R with respect to F is also dependency preserving. As a corollary of this result we are able to show that when F satisfies the intersection property, it also satisfies the intersection property, it also satisfies the split-freeness property(i.e., is monodependent), if and only if every lossless join decomposition of R, which is in BCNF, is also dependency preserving. Our final result is that when F is monodependent, then there exists a unique optimum lossless join decomposition of R, which is in BCNF, and is also dependency preserving. Furthermore, this ultimate decomposition can be attained in polynomial time in the size of F.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "additivity problem; complexity; dependency preserving decomposition; incomplete information; intersection property; lossless join decomposition; monodependence; normal forms; null functional dependencies; optimum cover; prime attribute problem; split-freeness property; superkey of cardinality k problem", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Normal forms}", } @Article{Wijsen:1999:TFC, author = "Jef Wijsen", title = "Temporal {FDs} on complex objects", journal = j-TODS, volume = "24", number = "1", pages = "127--176", month = mar, year = "1999", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Sep 26 08:44:02 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/citations/journals/tods/1999-24-1/p127-wijsen/", abstract = "{\em Temporal functional dependencies\/} (TFD) are defined for temporal databases that include object identity. It is argued that object identity can overcome certain semantic difficulties with existing temporal relational data models. Practical applications of TFDs in object bases are discussed. Reasoning about TFDs is at the center of this paper. It turns out that the distinction between acyclic and cyclic schemas is significant. For acyclic schemas, a complete axiomatization for finite implication is given and an algorithm for deciding finite implication provided. The same axiomatization is proven complete for unrestricted implication in unrestricted schemas, which can be cyclic. An interesting result is that there are cyclic schemas for which unrestricted and finite implication do not coincide. TFDs relate and extend some earlier work on dependency theory in temporal databases. Throughout this paper, the construct of TFD is compared with the notion of temporal FD introduced by Wang et al. (1997). A comparison with other related work is provided at the end of the article.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database constraints; functional dependency; object-identity; temporal databases; time granularity", subject = "Information Systems --- Database Management --- Logical Design (H.2.1)", } @Article{Chaudhuri:1999:OQU, author = "Surajit Chaudhuri and Kyuseok Shim", title = "Optimization of queries with user-defined predicates", journal = j-TODS, volume = "24", number = "2", pages = "177--228", month = jun, year = "1999", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Sep 26 08:44:02 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/citations/journals/tods/1999-24-2/p177-chaudhuri/", abstract = "Relational databases provide the ability to store user-defined functions and predicates which can be invoked in SQL queries. When evaluation of a user-defined predicate is relatively expensive, the traditional method of evaluating predicates as early as possible is no longer a sound heuristic. There are two previous approaches for optimizing such queries. However, neither is able to guarantee the optimal plan over the desired execution space. We present efficient techniques that are able to guarantee the choice of an optimal plan over the desired execution space. The {\em optimization algorithm with complete rank-ordering\/} improves upon the naive optimization algorithm by exploiting the nature of the cost formulas for join methods and is polynomial in the number of user-defined predicates (for a given number of relations.) We also propose {\em pruning rules\/} that significantly reduce the cost of searching the execution space for both the naive algorithm as well as for the optimization algorithm with complete rank-ordering, without compromising optimality. We also propose a {\em conservative local heuristic\/} that is simpler and has low optimization overhead. Although it is not always guaranteed to find the optimal plans, it produces close to optimal plans in most cases. We discuss how, depending on application requirements, to determine the algorithm of choice. It should be emphasized that our optimization algorithms handle user-defined selections as well as user-defined join predicates uniformly. We present complexity analysis and experimental comparison of the algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Management; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "dynamic programming; query optimization; user-defined predicates", subject = "Information Systems --- Database Management (H.2); Information Systems --- Database Management --- General (H.2.0); Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}", } @Article{Gravano:1999:GTS, author = "Luis Gravano and H{\'e}ctor Garc{\'\i}a-Molina and Anthony Tomasic", title = "{GlOSS}: text-source discovery over the {Internet}", journal = j-TODS, volume = "24", number = "2", pages = "229--264", month = jun, year = "1999", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Sep 26 08:44:02 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/citations/journals/tods/1999-24-2/p229-gravano/", abstract = "The dramatic growth of the Internet has created a new problem for users: location of the relevant sources of documents. This article presents a framework for (and experimentally analyzes a solution to) this problem, which we call the {\em text-source discovery problem}. Our approach consists of two phases. First, each text source exports its contents to a centralized service. Second, users present queries to the service, which returns an ordered list of promising text sources. This article describes {\em GlOSS}, Glossary of Servers Server, with two versions: {\em bGlOSS}, which provides a Boolean query retrieval model, and {\em vGlOSS}, which provides a vector-space retrieval model. We also present {\em hGlOSS}, which provides a decentralized version of the system. We extensively describe the methodology for measuring the retrieval effectiveness of these systems and provide experimental evidence, based on actual data, that all three systems are highly effective in determining promising text sources for a given query.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "digital libraries; distributed information retrieval; Internet search and retrieval; text databases", subject = "Information Systems --- Information Storage and Retrieval --- General (H.3.0); Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3); Information Systems --- Information Storage and Retrieval --- Digital Libraries (H.3.7); Information Systems --- Database Management --- Systems (H.2.4): {\bf Textual databases}; Information Systems --- Information Storage and Retrieval (H.3)", } @Article{Hjaltason:1999:DBS, author = "G{\'\i}sli R. Hjaltason and Hanan Samet", title = "Distance browsing in spatial databases", journal = j-TODS, volume = "24", number = "2", pages = "265--318", month = jun, year = "1999", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Oct 21 16:01:19 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/citations/journals/tods/1999-24-2/p265-hjaltason/", abstract = "We compare two different techniques for browsing through a collection of spatial objects stored in an R-tree spatial data structure on the basis of their distances from an arbitrary spatial query object. The conventional approach is one that makes use of a $k$-nearest neighbor algorithm where $k$ is known prior to the invocation of the algorithm. Thus if $ m k $ neighbors are needed, the $k$-nearest neighbor algorithm has to be reinvoked for $m$ neighbors, thereby possibly performing some redundant computations. The second approach is incremental in the sense that having obtained the $k$ nearest neighbors, the $ k + 1 {\em st \/ } $ neighbor can be obtained without having to calculate the $ k + 1 $ nearest neighbors from scratch. The incremental approach is useful when processing complex queries where one of the conditions involves spatial proximity (e.g., the nearest city to Chicago with population greater than a million), in which case a query engine can make use of a pipelined strategy. We present a general incremental nearest neighbor algorithm that is applicable to a large class of hierarchical spatial data structures. This algorithm is adapted to the R-tree and its performance is compared to an existing $k$-nearest neighbor algorithm for R-trees [Rousseopoulos et al. 1995]. Experiments show that the incremental nearest neighbor algorithm significantly outperforms the $k$-nearest neighbor algorithm for distance browsing queries in a spatial database that uses the R-tree as a spatial index. Moreover, the incremental nearest neighbor algorithm usually outperforms the $k$-nearest neighbor algorithm when applied to the $k$-nearest neighbor problem for the R-tree, although the improvement is not nearly as large as for distance browsing queries. In fact, we prove informally that at any step in its execution the incremental nearest neighbor algorithm is optimal with respect to the spatial data structure that is employed. Furthermore, based on some simplifying assumptions, we prove that in two dimensions the number of distance computations and leaf nodes accesses made by the algorithm for finding $k$ neighbors is $ O(k + k) $.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "distance browsing; hierarchical spatial data structures; nearest neighbors; R-trees; ranking", subject = "Information Systems --- Database Management --- Database Applications (H.2.8): {\bf Spatial databases and GIS}; Data --- Data Structures (E.1): {\bf Trees}", } @Article{Alagic:1999:TCO, author = "Suad Alag{\'\i}c", title = "Type-checking {OQL} queries in the {ODMG} type systems", journal = j-TODS, volume = "24", number = "3", pages = "319--360", month = sep, year = "1999", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Sep 26 08:44:02 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/citations/journals/tods/1999-24-3/p319-alagic/", abstract = "Several negative results are proved about the ability to type-check queries in the only existing proposed standard for object-oriented databases. The first of these negative results is that it is not possible to type-check OQL queries in the type system underlying the ODMG object model and its definition language ODL. The second negative result is that OQL queries cannot be type-checked in the type system of the Java binding of the ODMG standard either. A solution proposed in this paper is to extend the ODMG object model with explicit support for parametric polymorphism (universal type quantification). These results show that Java cannot be a viable database programming language unless extended with parametric polymorphism. This is why type-checking OQL queries presents no problem for the type system of the C++ binding of the ODMG standard. However, a type system that is strictly more powerful than any of the type systems of the ODMG standard is required in order to properly type ordered collections and indices. The required form of polymorphism is bounded type quantification (constrained genericity) and even F-bounded polymorphism. A further result is that neither static nor the standard dynamic object-oriented type-checking is possible for Java OQL, in spite of the fact that Java OQL combines features of two strongly and mostly statically-typed languages. Contrary to one of the promises of object-oriented database technology, this result shows that the impedance mismatch does not disappear in the ODMG standard. A type-safe reflective technique is proposed for overcoming this mismatch.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Languages; Standardization; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "C++; Java; ODMG standard; OQL; parametric polymorphism; type systems", subject = "Information Systems --- Database Management --- Languages (H.2.3): {\bf Query languages}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Data description languages (DDL)}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Database (persistent) programming languages}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Object-oriented databases}; Software --- Programming Languages --- Language Classifications (D.3.2): {\bf Object-oriented languages}; Software --- Programming Languages --- Language Constructs and Features (D.3.3): {\bf Classes and objects}; Software --- Programming Languages --- Language Constructs and Features (D.3.3): {\bf Inheritance}; Software --- Programming Languages --- Language Constructs and Features (D.3.3): {\bf Polymorphism}", } @Article{Bozkaya:1999:ILM, author = "Tolga Bozkaya and Meral Ozsoyoglu", title = "Indexing large metric spaces for similarity search queries", journal = j-TODS, volume = "24", number = "3", pages = "361--404", month = sep, year = "1999", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Sep 26 08:44:02 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/citations/journals/tods/1999-24-3/p361-bozkaya/", abstract = "One of the common queries in many database applications is finding approximate matches to a given query item from a collection of data items. For example, given an image database, one may want to retrieve all images that are similar to a given query image. Distance-based index structures are proposed for applications where the distance computations between objects of the data domain are expensive (such as high-dimensional data) and the distance function is metric. In this paper we consider using distance-based index structures for similarity queries on large metric spaces. We elaborate on the approach that uses reference points (vantage points) to partition the data space into spherical shell-like regions in a hierarchical manner. We introduce the multivantage point tree structure (mvp-tree) that uses more than one vantage point to partition the space into spherical cuts at each level. In answering similarity-based queries, the mvp-tree also utilizes the precomputed (at construction time) distances between the data points and the vantage points.\par We summarize the experiments comparing mvp-trees to vp-trees that have a similar partitioning strategy, but use only one vantage point at each level and do not make use of the precomputed distances. Empirical studies show that the mvp-tree outperforms the vp-tree by 20\% to 80\% for varying query ranges and different distance distributions. Next, we generalize the idea of using multiple vantage points and discuss the results of experiments we have made to see how varying the number of vantage points in a node affects performance and how much is gained in performance by making use of precomputed distances. The results show that, after all, it may be best to use a large number of vantage points in an internal node in order to end up with a single directory node and keep as many of the precomputed distances as possible to provide more efficient filtering during search operations. Finally, we provide some experimental results that compare mvp-trees with M-trees, which is a dynamic distance-based index structure for metric domains.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Experimentation; Measurement; Performance; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", subject = "Data --- Data Structures (E.1): {\bf Trees}; Information Systems --- Information Storage and Retrieval --- Content Analysis and Indexing (H.3.1): {\bf Indexing methods}; Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3): {\bf Search process}", } @Article{Casati:1999:SIE, author = "Fabio Casati and Stefano Ceri and Stefano Paraboschi and Guiseppe Pozzi", title = "Specification and implementation of exceptions in workflow management systems", journal = j-TODS, volume = "24", number = "3", pages = "405--451", month = sep, year = "1999", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Sep 26 08:44:02 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/citations/journals/tods/1999-24-3/p405-casati/", abstract = "Although workflow management systems are most applicable when an organization follows standard business processes and routines, any of these processes faces the need for handling exceptions, i.e., asynchronous and anomalous situations that fall outside the normal control flow.\par In this paper we concentrate upon anomalous situations that, although unusual, are part of the semantics of workflow applications, and should be specified and monitored coherently; in most real-life applications, such exceptions affect a significant fraction of workflow cases. However, very few workflow management systems are integrated with a highly expressive language for specifying this kind of exception and with a system component capable of handling it.\par We present Chimera-Exc, a language for the specification of exceptions for workflows based on detached active rules, and then describe the architecture of a system, called FAR, that implements Chimera-Exc and integrates it with a commercial workflow management system and database server. We discuss the main issues that were solved by our implementation, and report on the performance of FAR. We also discuss design criteria for exceptions in light of the formal properties of their execution. Finally, we focus on the portability of FAR on its unbundling to a generic architecture with detached active rules.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Languages; Management; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "active rules; asynchronous events; exceptions; workflow management systems", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Rule-based databases}; Information Systems --- Information Systems Applications --- General (H.4.0)", } @Article{Dey:1999:IDD, author = "Debabrata Dey and Veda C. Storey and Terence M. Barron", title = "Improving database design through the analysis of relationships", journal = j-TODS, volume = "24", number = "4", pages = "453--486", month = dec, year = "1999", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Sep 26 08:44:02 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/1999-24-4/p453-dey/p453-dey.pdf; http://www.acm.org/pubs/citations/journals/tods/1999-24-4/p453-dey/", abstract = "Much of the work on conceptual modeling involves the use of an entity-relationship model in which binary relationships appear as associations between two entities. Relationships involving more than two entities are considered rare and, therefore, have not received adequate attention. This research provides a general framework for the analysis of relationships in which binary relationships simply become a special case. The framework helps a designer to identify ternary and other higher-degree relationships that are commonly represented, often inappropriately, as either entities or binary relationships. Generalized rules are also provided for representing higher-degree relationships in the relational model. This uniform treatment of relationships should significantly ease the burden on a designer by enabling him or her to extract more information from a real-world situation and represent it properly in a conceptual design.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "conceptual model; ER model; integrity constraint; min-max cardinality; relationship degree; weak relationship", subject = "Information Systems --- Models and Principles --- General (H.1.0); Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Database Administration (H.2.7): {\bf Security, integrity, and protection}", } @Article{Muralidhar:1999:SRD, author = "Krishnamurty Muralidhar and Rathindra Sarathy", title = "Security of random data perturbation methods", journal = j-TODS, volume = "24", number = "4", pages = "487--493", month = dec, year = "1999", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Sep 26 08:44:02 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/citations/journals/tods/1999-24-4/p487-muralidhar/", abstract = "Statistical databases often use random data perturbation (RDP) methods to protect against disclosure of confidential numerical attributes. One of the key requirements of RDP methods is that they provide the appropriate level of security against snoopers who attempt to obtain information on confidential attributes through statistical inference. In this study, we evaluate the security provided by three methods of perturbation. The results of this study allow the database administrator to select the most effective RDP method that assures adequate protection against disclosure of confidential information.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Measurement; Security", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "bias; covariance; noise addition; random data perturbation", subject = "Information Systems --- Database Management --- Database Administration (H.2.7): {\bf Security, integrity, and protection}; Information Systems --- Database Management --- Database Applications (H.2.8): {\bf Statistical databases}", } @Article{Wand:1999:OAR, author = "Yair Wand and Veda C. Storey and Ron Weber", title = "An ontological analysis of the relationship construct in conceptual modeling", journal = j-TODS, volume = "24", number = "4", pages = "494--528", month = dec, year = "1999", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Sep 26 08:44:02 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/citations/journals/tods/1999-24-4/p494-wand/", abstract = "Conceptual models or semantic data models were developed to capture the meaning of an application domain as perceived by someone. Moreover, concepts employed in semantic data models have recently been adopted in object-oriented approaches to systems analysis and design. To employ conceptual modeling constructs effectively, their meanings have to be defined rigorously. Often, however, rigorous definitions of these constructs are missing. This situation occurs especially in the case of the relationship construct. Empirical evidence shows that use of relationships is often problematical as a way of communicating the meaning of an application domain. For example, users of conceptual modeling methodologies are frequently confused about whether to show an association between things via a relationship, an entity, or an attribute. Because conceptual models are intended to capture knowledge about a real-world domain, we take the view that the meaning of modeling constructs should be sought in models of reality. Accordingly, we use ontology, which is the branch of philosophy dealing with models of reality, to analyze the meaning of common conceptual modeling constructs. Our analysis provides a precise definition of several conceptual modeling constructs. Based on our analysis, we derive rules for the use of relationships in entity-relationship conceptual modeling. Moreover, we show how the rules resolve ambiguities that exist in current practice and how they can enrich the capacity of an entity-relationship conceptual model to capture knowledge about an application domain.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "conceptual modeling; database design; entity-relationship model; object-oriented modeling; ontology; semantic data modeling", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Computing Milieux --- Management of Computing and Information Systems --- Project and People Management (K.6.1): {\bf Systems analysis and design}", } @Article{Yan:1999:SID, author = "Tak W. Yan and Hector Garcia-Molina", title = "The {SIFT} information dissemination system", journal = j-TODS, volume = "24", number = "4", pages = "529--565", month = dec, year = "1999", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Sep 26 08:44:02 MDT 2000", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/citations/journals/tods/1999-24-4/p529-yan/", abstract = "Information dissemination is a powerful mechanism for finding information in wide-area environments. An information dissemination server accepts long-term user queries, collects new documents from information sources, matches the documents against the queries, and continuously updates the users with relevant information. This paper is a retrospective of the Stanford Information Filtering Service (SIFT), a system that as of April 1996 was processing over 40,000 worldwide subscriptions and over 80,000 daily documents. The paper describes some of the indexing mechanisms that were developed for SIFT, as well as the evaluations that were conducted to select a scheme to implement. It also describes the implementation of SIFT, and experimental results for the actual system. Finally, it also discusses and experimentally evaluates techniques for distributing a service such as SIFT for added performance and availability.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Boolean queries; dissemination; filtering; indexing; vector space queries", subject = "Information Systems --- Database Management --- Systems (H.2.4); Information Systems --- Information Storage and Retrieval --- Information Storage (H.3.2); Information Systems --- Information Storage and Retrieval --- Information Search and Retrieval (H.3.3); Information Systems --- Information Storage and Retrieval --- Systems and Software (H.3.4)", } @Article{Morris:19xx:DON, author = "K. Morris and J. D. Ullman and A. VanGelder", title = "Design Overview of the {NAIL!} System", journal = j-TODS, volume = "??", number = "??", pages = "??--??", month = "????", year = "19xx", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibsource = "Database/Wiederhold.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Also published in/as: Proc. of International Conference on Logic Programming, BCS 3, 1986. Also published in/as: Stanford Un., CSD, TR-CS-86-1108.", annote = "The {NAIL!} System seems to be a much more powerful query language than the ones commercially available today. It adds the power and dexterity of Prolog-like logic to standard query techniques. The {NAIL!} System exhibits a tendency to swing the database community from object-oriented query languages back to value-oriented query languages. There seemed to be ambiguity as to how to handle recursive rules. The paper gave some techniques but didn't prefer one over the other. Overall, the {NAIL!} System appears to be a superior attempt at strengthening conventional database query operations.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Spiegler:19xx:DRA, author = "I. Spiegler and Y. Noff", title = "Dynamic Recovery as an Alternative to Data Base Restoration", journal = j-TODS, volume = "??", number = "??", pages = "??--??", month = "????", year = "19xx", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibsource = "Database/Wiederhold.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Submitted, March 1987.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Guting:2000:FRQ, author = "Ralf Hartmut G{\"u}ting and Michael H. B{\"o}hlen and Martin Erwig and Christian S. Jensen and Nikos A. Lorentzos and Markus Schneider and Michalis Vazirgiannis", title = "A foundation for representing and querying moving objects", journal = j-TODS, volume = "25", number = "1", pages = "1--42", month = mar, year = "2000", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/2000-25-1/p1-guting/p1-guting.pdf; http://www.acm.org/pubs/citations/journals/tods/2000-25-1/p1-guting/", abstract = "Spatio-temporal databases deal with geometries changing over time. The goal of our work is to provide a DBMS data model and query language capable of handling such time-dependent geometries, including those changing continuously that describe {\em moving objects}. Two fundamental abstractions are {\em moving point\/} and {\em moving region}, describing objects for which only the time-dependent position, or position and extent, respectively, are of interest. We propose to present such time-dependent geometries as attribute data types with suitable operations, that is, to provide an abstract data type extension to a DBMS data model and query language. This paper presents a design of such a system of abstract data types. It turns out that besides the main types of interest, moving point and moving region, a relatively large number of auxiliary data types are needed. For example, one needs a line type to represent the projection of a moving point into the plane, or a ``moving real'' to represent the time-dependent distance of two points. It then becomes crucial to achieve (i) orthogonality in the design of the system, i.e., type constructors can be applied uniformly; (ii) genericity and consistency of operations, i.e., operations range over as many types as possible and behave consistently; and (iii) closure and consistency between structure and operations of nontemporal and related temporal types. Satisfying these goal leads to a simple and expressive system of abstract data types that may be integrated into a query language to yield a powerful language for querying spatio-temporal data, including moving objects. The paper formally defines the types and operations, offers detailed insight into the considerations that went into the design, and exemplifies the use of the abstract data types using SQL. The paper offers a precise and conceptually clean foundation for implementing a spatio-temporal DBMS extension.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "abstract data types; algebra; moving objects; moving point; moving region; spatio-temporal data types; spatio-temporal databases", subject = "Information Systems --- Database Management --- Languages (H.2.3): {\bf Query languages}; Information Systems --- Database Management --- Database Applications (H.2.8): {\bf Spatial databases and GIS}", } @Article{Kossmann:2000:IDP, author = "Donald Kossmann and Konrad Stocker", title = "Iterative dynamic programming: a new class of query optimization algorithms", journal = j-TODS, volume = "25", number = "1", pages = "43--82", month = mar, year = "2000", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/2000-25-1/p43-kossmann/p43-kossmann.pdf; http://www.acm.org/pubs/citations/journals/tods/2000-25-1/p43-kossmann/", abstract = "The query optimizer is one of the most important components of a database system. Most commercial query optimizers today are based on a dynamic-programming algorithm, as proposed in Selinger et al. [1979]. While this algorithm produces good optimization results (i.e, good plans), its high complexity can be prohibitive if complex queries need to be processed, new query execution techniques need to be integrated, or in certain programming environments (e.g., distributed database systems). In this paper, we present and thoroughly evaluate a new class of query optimization algorithms that are based on a principle that we call {\em iterative dynamic programming}, or IDP for short. IDP has several important advantages: First, IDP-algorithms produce the best plans of all known algorithms in situations in which dynamic programming is not viable because of its high complexity. Second, some IDP variants are adaptive and produce as good plans as dynamic programming if dynamic programming is viable and as good-as possible plans if dynamic programming turns out to be not viable. Three, all IDP-algorithms can very easily be integrated into an existing optimizer which is based on dynamic programming.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design; Experimentation; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "dynamic programming; greedy algorithm; iterative dynamic programming; plan evaluation function; query optimization; query optimiztion; randomized optimization", subject = "Theory of Computation --- Analysis of Algorithms and Problem Complexity --- Nonnumerical Algorithms and Problems (F.2.2); Information Systems --- Database Management --- Systems (H.2.4): {\bf Distributed databases}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}", } @Article{Lerner:2000:MCT, author = "Barbara Staudt Lerner", title = "A model for compound type changes encountered in schema evolution", journal = j-TODS, volume = "25", number = "1", pages = "83--127", month = mar, year = "2000", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/2000-25-1/p83-lerner/p83-lerner.pdf; http://www.acm.org/pubs/citations/journals/tods/2000-25-1/p83-lerner/", abstract = "Schema evolution is a problem that is faced by long-lived data. When a schema changes, existing persistent data can become inaccessible unless the database system provides mechanisms to access data created with previous versions of the schema. Most existing systems that support schema evolution focus on changes local to individual types within the schema, thereby limiting the changes that the database maintainer can perform. We have developed a model of type changes involving multiple types. The model describes both type changes and their impact on data by defining derivation rules to initialize new data based on the existing data. The derivation rules can describe local and nonlocal changes to types to capture the intent of a large class of type change operations. We have built a system called Tess (Type Evolution Software System) that uses this model to recognize type changes by comparing schemas and then produces a transformer that can update data in a database to correspond to a newer version of the schema.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Languages", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "persistent programming languages; schema evolution", subject = "Information Systems --- Database Management --- Miscellaneous (H.2.m); Information Systems --- Database Management --- Languages (H.2.3): {\bf Database (persistent) programming languages}; Software --- Software Engineering --- Distribution, Maintenance, and Enhancement (D.2.7): {\bf Restructuring, reverse engineering, and reengineering}", } @Article{Bohm:2000:CMQ, author = "Christian B{\"o}hm", title = "A cost model for query processing in high dimensional data spaces", journal = j-TODS, volume = "25", number = "2", pages = "129--178", month = jun, year = "2000", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/2000-25-2/p129-bohm/p129-bohm.pdf; http://www.acm.org/pubs/citations/journals/tods/2000-25-2/p129-bohm/", abstract = "During the last decade, multimedia databases have become increasingly important in many application areas such as medicine, CAD, geography, and molecular biology. An important research topic in multimedia databases is similarity search in large data sets. Most current approaches that address similarity search use the feature approach, which transforms important properties of the stored objects into points of a high-dimensional space (feature vectors). Thus, similarity search is transformed into a neighborhood search in feature space. Multidimensional index structures are usually applied when managing feature vectors. Query processing can be improved substantially with optimization techniques such as blocksize optimization, data space quantization, and dimension reduction. To determine optimal parameters, an accurate estimate of index-based query processing performance is crucial. In this paper we develop a cost model for index structures for point databases such as the R*-tree and the X-tree. It provides accurate estimates of the number of data page accesses for range queries and nearest-neighbor queries under a Euclidean metric and a maximum metric and a maximum metric. The problems specific to high-dimensional data spaces, called boundary effects, are considered. The concept of the fractal dimension is used to take the effects of correlated data into account.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "cost model; multidimensional index", subject = "Information Systems --- Database Management --- Database Applications (H.2.8); Information Systems --- Information Storage and Retrieval --- Content Analysis and Indexing (H.3.1)", } @Article{Cui:2000:TLV, author = "Yingwei Cui and Jennifer Widom and Janet L. Wiener", title = "Tracing the lineage of view data in a warehousing environment", journal = j-TODS, volume = "25", number = "2", pages = "179--227", month = jun, year = "2000", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/2000-25-2/p179-cui/p179-cui.pdf; http://www.acm.org/pubs/citations/journals/tods/2000-25-2/p179-cui/", abstract = "We consider the {\em view data lineage\/} problem in a warehousing environment: For a given data item in a materialized warehouse view, we want to identify the set of source data items that produced the view item. We formally define the lineage problem, develop lineage tracing algorithms for relational views with aggregation, and propose mechanisms for performing consistent lineage tracing in a multisource data warehousing environment. Our result can form the basis of a tool that allows analysts to browse warehouse data, select view tuples of interest, and then ``drill-through'' to examine the exact source tuples that produced the view tuples of interest.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data warehouse; derivation; lineage; materialized views", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Database Manager}", } @Article{Parsons:2000:EIT, author = "Jeffrey Parsons and Yair Wand", title = "Emancipating instances from the tyranny of classes in information modeling", journal = j-TODS, volume = "25", number = "2", pages = "228--268", month = jun, year = "2000", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/2000-25-2/p228-parsons/p228-parsons.pdf; http://www.acm.org/pubs/citations/journals/tods/2000-25-2/p228-parsons/", abstract = "Database design commonly assumes, explicitly or implicitly, that instances must belong to classes. This can be termed the {\em assumption of inherent classification}. We argue that the extent and complexity of problems in schema integration, schema evolution, and interoperability are, to a large degree, consequences of inherent classification. Furthermore, we make the case that the assumption of inherent classification violates philosophical and cognitive guidelines on classification and is, therefore, inappropriate in view of the role of data modeling in representing knowledge about application domains. \par As an alternative, we propose a layered approach to modeling in which information about instances is separated from any particular classification. Two data modeling layers are proposed: (1) an {\em instance model\/} consisting of an instance base (i.e., information about instances and properties) and operations to populate, use, and maintain it; and (2) a {\em class model\/} consisting of a class base (i.e., information about classes defined in terms of properties) and operations to populate, use, and maintain it. The two-layered model provides {\em class independence}. This is analogous to the arguments of data independence offered by the relational model in comparison to hierarchical and network models. We show that a two-layered approach yields several advantages. In particular, schema integration is shown to be partially an artifact of inherent classification that can be greatly simplified in designing a database based on a layered model; schema evolution is supported without the complexity of operations currently required by class-based models; and the difficulties associated with interoperability among heterogeneous databases are reduced because there is no need to agree on the semantics of classes among independent databases. We conclude by considering the adequacy of a two-layered approach, outlining possible implementation strategies, and drawing attention to some practical considerations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Management; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "classification; conceptual modeling; database design; interoperability; ontology; schema evolution; schema integration", subject = "Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Data models}; Information Systems --- Database Management --- Logical Design (H.2.1): {\bf Schema and subschema}; Information Systems --- Database Management --- Heterogeneous Databases (H.2.5): {\bf Data translation**}; Information Systems --- Database Management --- Heterogeneous Databases (H.2.5)", } @Article{Baralis:2000:AAS, author = "Elena Baralis and Jennifer Widom", title = "An algebraic approach to static analysis of active database rules", journal = j-TODS, volume = "25", number = "3", pages = "269--332", month = sep, year = "2000", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/citations/journals/tods/2000-25-3/p269-baralis/", abstract = "Rules in active database systems can be very difficult to program due to the unstructured and unpredictable nature of rule processing. We provide static analysis techniques for predicting whether a given rule set is guaranteed to terminate and whether rule execution is confluent (guaranteed to have a unique final state). Our methods are based on previous techniques for analyzing rules in active database systems. We improve considerably on the previous techniques by providing analysis criteria that are much less conservative: our methods often determine that a rule set will terminate or is confluent when previous methods could not make this determination. Our improved analysis is based on a ``propagation'' algorithm, which uses an extended relational algebra to accurately determine when the action of one rule can affect the condition of another, and determine when rule actions commute. We consider both condition-action rules and event-condition-action-rules, making our approach widely applicable to relational active database rule languages and to the trigger language in the SQL:1999 standard.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "active database systems; confluence; database rule processing; database trigger processing; termination", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Rule-based databases}; Information Systems --- Database Management --- Languages (H.2.3): {\bf SQL}", } @Article{Kemme:2000:NAD, author = "Bettina Kemme and Gustavo Alonso", title = "A new approach to developing and implementing eager database replication protocols", journal = j-TODS, volume = "25", number = "3", pages = "333--379", month = sep, year = "2000", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/citations/journals/tods/2000-25-3/p333-kemme/", abstract = "Database replication is traditionally seen as a way to increase the availability and performance of distributed databases. Although a large number of protocols providing data consistency and fault-tolerance have been proposed, few of these ideas have ever been used in commercial products due to their complexity and performance implications. Instead, current products allow inconsistencies and often resort to centralized approaches which eliminates some of the advantages of replication. As an alternative, we propose a suite of replication protocols that addresses the main problems related to database replication. On the one hand, our protocols maintain data consistency and the same transactional semantics found in centralized systems. On the other hand, they provide flexibility and reasonable performance. To do so, our protocols take advantage of the rich semantics of group communication primitives and the relaxed isolation guarantees provided by most databases. This allows us to eliminate the possibility of deadlocks, reduce the message overhead and increase performance. A detailed simulation study shows the feasibility of the approach and the flexibility with which different types of bottlenecks can be circumvented.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Management; Performance; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database replication; fault-tolerance; group communication; isolation levels; one-copy-serializability; replica control; total error multicast", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Concurrency}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Distributed databases}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Transaction processing}; Computer Systems Organization --- Computer-Communication Networks --- Distributed Systems (C.2.4); Computer Systems Organization --- Performance of Systems (C.4)", } @Article{Meo:2000:TDV, author = "Rosa Meo", title = "Theory of dependence values", journal = j-TODS, volume = "25", number = "3", pages = "380--406", month = sep, year = "2000", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Apr 14 10:34:48 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/citations/journals/tods/2000-25-3/p380-meo/", abstract = "A new model to evaluate dependencies in data mining problems is presented and discussed. The well-known concept of the association rule is replaced by the new definition of dependence value, which is a single real number uniquely associated with a given itemset. Knowledge of dependence values is sufficient to describe all the dependencies characterizing a given data mining problem. The dependence value of an itemset is the difference between the occurrence probability of the itemset and a corresponding ``maximum independence estimate.'' This can be determined as a function of joint probabilities of the subsets of the itemset being considered by maximizing a suitable entropy function. So it is possible to separate in an itemset of cardinality $k$ the dependence inherited from its subsets of cardinality ($ k 1 $ ) and the specific inherent dependence of that itemset. The absolute value of the difference between the probability p($i$ ) of the event $i$ that indicates the presence of the itemset $ \{ a, b, \ldots {} \} $ and its maximum independence estimate is constant for any combination of values of $ Q a, b, \ldots {} Q $. In addition, the Boolean function specifying the combination of values for which the dependence is positive is a parity function. So the determination of such combinations is immediate. The model appears to be simple and powerful.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Experimentation; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "association rules; dependence rules; entropy; variables independence", subject = "Information Systems --- Database Management --- Database Applications (H.2.8): {\bf Data mining}; Information Systems --- Database Management --- Database Applications (H.2.8): {\bf Statistical databases}; Information Systems --- Models and Principles --- Systems and Information Theory (H.1.1): {\bf Information theory}; Computing Methodologies --- Artificial Intelligence --- Knowledge Representation Formalisms and Methods (I.2.4)", } @Article{Bohlen:2000:TSM, author = "Michael H. B{\"o}hlen and Christian S. Jensen and Richard T. Snodgrass", title = "Temporal statement modifiers", journal = j-TODS, volume = "25", number = "4", pages = "407--456", year = "2000", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 26 08:20:52 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/2000-25-4/p407-bohlen/p407-bohlen.pdf; http://www.acm.org/pubs/citations/journals/tods/2000-25-4/p407-bohlen/", abstract = "A wide range of database applications manage time-varying data. Many temporal query languages have been proposed, each one the result of many carefully made yet subtly interacting design decisions. In this article we advocate a different approach to articulating a set of requirements, or desiderata, that directly imply the syntactic structure and core semantics of a temporal extension of an (arbitrary) nontemporal query language. These desiderata facilitate transitioning applications from a nontemporal query language and data model, which has received only scant attention thus far. \par The paper then introduces the notion of {\em statement modifiers\/} that provide a means of systematically adding temporal support to an existing query language. Statement modifiers apply to all query language statements, for example, queries, cursor definitions, integrity constraints, assertions, views, and data manipulation statements. We also provide a way to systematically add temporal support to an existing implementation. The result is a temporal query language syntax, semantics, and implementation that derives from first principles. \par We exemplify this approach by extending SQL-92 with statement modifiers. This extended language, termed ATSQL, is formally defined via a denotational-semantics-style mapping of temporal statements to expressions using a combination of temporal and conventional relational algebraic operators.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "ATSQL; statement modifiers; temporal databases", subject = "Information Systems --- Database Management --- Languages (H.2.3): {\bf Data manipulation languages (DML)}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Query processing}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}; Information Systems --- Database Management --- Languages (H.2.3): {\bf Data description languages (DDL)}", } @Article{Fegaras:2000:OOQ, author = "Leonidas Fegaras and David Maier", title = "Optimizing object queries using an effective calculus", journal = j-TODS, volume = "25", number = "4", pages = "457--516", year = "2000", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 26 08:20:52 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/2000-25-4/p457-fegaras/p457-fegaras.pdf; http://www.acm.org/pubs/citations/journals/tods/2000-25-4/p457-fegaras/", abstract = "Object-oriented databases (OODBs) provide powerful data abstractions and modeling facilities, but they generally lack a suitable framework for query processing and optimization. The development of an effective query optimizer is one of the key factors for OODB systems to successfully compete with relational systems, as well as to meet the performance requirements of many nontraditional applications. We propose an effective framework with a solid theoretical basis for optimizing OODB query languages. Our calculus, called the monoid comprehension calculus, captures most features of ODMG OQL, and is a good basis for expressing various optimization algorithms concisely. This article concentrates on query unnesting (also known as query decorrelation), an optimization that, even though it improves performance considerably, is not treated properly (if at all) by most OODB systems. Our framework generalizes many unnesting techniques proposed recently in the literature, and is capable of removing any form of query nesting using a very simple and efficient algorithm. The simplicity of our method is due to the use of the monoid comprehension calculus as an intermediate form for OODB queries. The monoid comprehension calculus treats operations over multiple collection types, aggregates, and quantifiers in a similar way, resulting in a uniform method of unnesting queries, regardless of their type of nesting.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Design; Experimentation; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "nested relations; object-oriented databases; query decorrelation; query optimization", subject = "Information Systems --- Database Management --- Logical Design (H.2.1); Information Systems --- Database Management --- Systems (H.2.4): {\bf Object-oriented databases}", } @Article{Kossmann:2000:CII, author = "Donald Kossmann and Michael J. Franklin and Gerhard Drasch and Wig Ag", title = "Cache investment: integrating query optimization and distributed data placement", journal = j-TODS, volume = "25", number = "4", pages = "517--558", year = "2000", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 26 08:20:52 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.acm.org/pubs/articles/journals/tods/2000-25-4/p517-kossmann/p517-kossmann.pdf; http://www.acm.org/pubs/citations/journals/tods/2000-25-4/p517-kossmann/", abstract = "Emerging distributed query-processing systems support flexible execution strategies in which each query can be run using a combination of data shipping and query shipping. As in any distributed environment, these systems can obtain tremendous performance and availability benefits by employing dynamic data caching. When flexible execution and dynamic caching are combined, however, a circular dependency arises: Caching occurs as a by-product of query operator placement, but query operator placement decisions are based on (cached) data location. The practical impact of this dependency is that query optimization decisions that appear valid on a per-query basis can actually cause suboptimal performance for all queries in the long run. \par To address this problem, we developed {\em Cache Investment\/} - a novel approach for integrating query optimization and data placement that looks beyond the performance of a single query. Cache Investment sometimes intentionally generates a ``suboptimal'' plan for a particular query in the interest of effecting a better data placement for subsequent queries. Cache Investment can be integrated into a distributed database system without changing the internals of the query optimizer. In this paper, we propose Cache Investment mechanisms and policies and analyze their performance. The analysis uses results from both an implementation on the SHORE storage manager and a detailed simulation model. Our results show that Cache Investment can significantly improve the overall performance of a system and demonstrate the trade-offs among various alternative policies.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", generalterms = "Algorithms; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "cache investment; caching; client-server database systems; data shipping; dynamic data placement; query optimization; query shipping", subject = "Information Systems --- Database Management --- Systems (H.2.4): {\bf Distributed databases}; Information Systems --- Database Management --- Systems (H.2.4): {\bf Relational databases}", } @Article{Andries:2001:AUM, author = "Marc Andries and Luca Cabibbo and Jan Paredaens and Jan van den Bussche", title = "Applying an update method to a set of receivers", journal = j-TODS, volume = "26", number = "1", pages = "1--40", month = mar, year = "2001", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Feb 19 16:14:50 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Dekhtyar:2001:PTD, author = "Alex Dekhtyar and Robert Ross and V. S. Subrahmanian", title = "Probabilistic temporal databases, {I}: algebra", journal = j-TODS, volume = "26", number = "1", pages = "41--95", month = mar, year = "2001", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Feb 19 16:14:50 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Hsu:2001:RBP, author = "Windsor W. Hsu and Alan Jay Smith and Honesty C. Young", title = "{I/O} reference behavior of production database workloads and the {TPC} benchmarks --- an analysis at the logical level", journal = j-TODS, volume = "26", number = "1", pages = "96--143", month = mar, year = "2001", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Feb 19 16:14:50 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Chomicki:2001:QAD, author = "Jan Chomicki and David Toman and Michael H. B{\"o}hlen", title = "Querying {ATSQL} databases with temporal logic", journal = j-TODS, volume = "26", number = "2", pages = "145--178", month = jun, year = "2001", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Feb 19 16:12:15 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Fukuda:2001:DMO, author = "Takeshi Fukuda and Yasuhiko Morimoto and Shimichi Morishita and Takeshi Tokuyama", title = "Data mining with optimized two-dimensional association rules", journal = j-TODS, volume = "26", number = "2", pages = "179--213", month = jun, year = "2001", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Feb 19 16:12:15 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Jajodia:2001:FSM, author = "Sushil Jajodia and Pierangela Samarati and Maria Luisa Sapino and V. S. Subrahmanian", title = "Flexible support for multiple access control policies", journal = j-TODS, volume = "26", number = "2", pages = "214--260", month = jun, year = "2001", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Feb 19 16:12:15 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Eiter:2001:POB, author = "Thomas Eiter and James J. Lu and Thomas Lukasiewicz and V. S. Subrahmanian", title = "Probabilistic object bases", journal = j-TODS, volume = "26", number = "3", pages = "264--312", month = sep, year = "2001", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Feb 19 16:12:15 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Li:2001:AQU, author = "Chen Li and Edward Chang", title = "Answering queries with useful bindings", journal = j-TODS, volume = "26", number = "3", pages = "313--343", month = sep, year = "2001", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Feb 19 16:12:15 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Ng:2001:ERD, author = "Wilfred Ng", title = "An extension of the relational data model to incorporate ordered domains", journal = j-TODS, volume = "26", number = "3", pages = "344--383", month = sep, year = "2001", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Feb 19 16:12:15 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Kotidis:2001:CDV, author = "Yannis Kotidis and Nick Roussopoulos", title = "A case for dynamic view management", journal = j-TODS, volume = "26", number = "4", pages = "388--423", month = dec, year = "2001", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Feb 19 16:12:16 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Mamoulis:2001:MSJ, author = "Nikos Mamoulis and Dimitris Papadias", title = "Multiway spatial joins", journal = j-TODS, volume = "26", number = "4", pages = "424--475", month = dec, year = "2001", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Feb 19 16:12:16 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Lakshmanan:2001:SES, author = "Laks V. S. Lakshmanan and Fereidoon Sadri and Subbu N. Subramanian", title = "{SchemaSQL}: {An} extension to {SQL} for multidatabase interoperability", journal = j-TODS, volume = "26", number = "4", pages = "476--519", month = dec, year = "2001", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Feb 19 16:12:16 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Suciu:2002:DQE, author = "Dan Suciu", title = "Distributed query evaluation on semistructured data", journal = j-TODS, volume = "27", number = "1", pages = "1--62", month = mar, year = "2002", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Nov 5 11:23:13 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Schuldt:2002:AIT, author = "Heiko Schuldt and Gustavo Alonso and Catriel Beeri and Hans-J{\"o}rg Schek", title = "Atomicity and isolation for transactional processes", journal = j-TODS, volume = "27", number = "1", pages = "63--116", month = mar, year = "2002", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Nov 5 11:23:13 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Liu:2002:LFD, author = "Mengchi Liu and Gillian Dobbie and Tok Wang Ling", title = "A logical foundation for deductive object-oriented databases", journal = j-TODS, volume = "27", number = "1", pages = "117--151", month = mar, year = "2002", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Nov 5 11:23:13 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Bruno:2002:TSQ, author = "Nicolas Bruno and Surajit Chaudhuri and Luis Gravano", title = "Top-$k$ selection queries over relational databases: {Mapping} strategies and performance evaluation", journal = j-TODS, volume = "27", number = "2", pages = "153--187", month = jun, year = "2002", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Nov 5 11:23:13 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Chakrabarti:2002:LAD, author = "Kaushik Chakrabarti and Eamonn Keogh and Sharad Mehrotra and Michael Pazzani", title = "Locally adaptive dimensionality reduction for indexing large time series databases", journal = j-TODS, volume = "27", number = "2", pages = "188--228", month = jun, year = "2002", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Nov 5 11:23:13 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Calders:2002:SDM, author = "Toon Calders and Raymond T. Ng and Jef Wijsen", title = "Searching for dependencies at multiple abstraction levels", journal = j-TODS, volume = "27", number = "3", pages = "229--260", month = sep, year = "2002", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Nov 5 11:23:13 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Gibbons:2002:FIM, author = "Phillip B. Gibbons and Yossi Matias and Viswanath Poosala", title = "Fast incremental maintenance of approximate histograms", journal = j-TODS, volume = "27", number = "3", pages = "261--298", month = sep, year = "2002", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Nov 5 11:23:13 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Tao:2002:CMO, author = "Yufei Tao and Dimitris Papadias and Jun Zhang", title = "Cost models for overlapping and multiversion structures", journal = j-TODS, volume = "27", number = "3", pages = "299--342", month = sep, year = "2002", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Nov 5 11:23:13 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{May:2002:UGS, author = "Wolfgang May and Bertram Lud{\"a}scher", title = "Understanding the global semantics of referential actions using logic rules", journal = j-TODS, volume = "27", number = "4", pages = "343--397", month = dec, year = "2002", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Nov 5 11:23:14 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Ciaccia:2002:SMS, author = "Paolo Ciaccia and Marco Patella", title = "Searching in metric spaces with user-defined and approximate distances", journal = j-TODS, volume = "27", number = "4", pages = "398--437", month = dec, year = "2002", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Nov 5 11:23:14 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Fernandez:2002:SFP, author = "Mary Fern{\'a}ndez and Yana Kadiyska and Dan Suciu and Atsuyuki Morishima and Wang-Chiew Tan", title = "{SilkRoute}: a framework for publishing relational data in {XML}", journal = j-TODS, volume = "27", number = "4", pages = "438--493", month = dec, year = "2002", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Nov 5 11:23:14 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Beneventano:2003:DLS, author = "Domenico Beneventano and Sonia Bergamaschi and Claudio Sartori", title = "Description logics for semantic query optimization in object-oriented database systems", journal = j-TODS, volume = "28", number = "1", pages = "1--50", month = mar, year = "2003", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Aug 7 13:51:37 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Karp:2003:SAF, author = "Richard M. Karp and Scott Shenker and Christos H. Papadimitriou", title = "A simple algorithm for finding frequent elements in streams and bags", journal = j-TODS, volume = "28", number = "1", pages = "51--55", month = mar, year = "2003", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Aug 7 13:51:37 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Melnik:2003:AAS, author = "Sergey Melnik and Hector Garcia-Molina", title = "Adaptive algorithms for set containment joins", journal = j-TODS, volume = "28", number = "1", pages = "56--99", month = mar, year = "2003", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Aug 7 13:51:37 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Tao:2003:SQD, author = "Yufei Tao and Dimitris Papadias", title = "Spatial queries in dynamic environments", journal = j-TODS, volume = "28", number = "2", pages = "101--139", month = jun, year = "2003", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Aug 7 13:51:37 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Gunopulos:2003:DAM, author = "Dimitrios Gunopulos and Roni Khardon and Heikki Mannila and Sanjeev Saluja and Hannu Toivonen and Ram Sewak Sharma", title = "Discovering all most specific sentences", journal = j-TODS, volume = "28", number = "2", pages = "140--174", month = jun, year = "2003", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Aug 7 13:51:37 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Lechtenborger:2003:CRV, author = "Jens Lechtenb{\"o}rger and Gottfried Vossen", title = "On the computation of relational view complements", journal = j-TODS, volume = "28", number = "2", pages = "175--208", month = jun, year = "2003", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Aug 7 13:51:37 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Wijesekera:2003:RPF, author = "Duminda Wijesekera and Sushil Jajodia and Francesco Parisi-Presicce and {\AA}sa Hagstr{\"o}m", title = "Removing permissions in the flexible authorization framework", journal = j-TODS, volume = "28", number = "3", pages = "209--229", month = sep, year = "2003", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Oct 31 05:55:40 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Jacox:2003:ISJ, author = "Edwin H. Jacox and Hanan Samet", title = "Iterative spatial join", journal = j-TODS, volume = "28", number = "3", pages = "230--256", month = sep, year = "2003", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Oct 31 05:55:40 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Jimenez-Peris:2003:QAD, author = "Ricardo Jim{\'e}nez-Peris and M. Pati{\~n}o-Mart{\'\i}nez and Gustavo Alonso and Bettina Kemme", title = "Are quorums an alternative for data replication?", journal = j-TODS, volume = "28", number = "3", pages = "257--294", month = sep, year = "2003", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Oct 31 05:55:40 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Tao:2003:APS, author = "Yufei Tao and Jimeng Sun and Dimitris Papadias", title = "Analysis of predictive spatio-temporal queries", journal = j-TODS, volume = "28", number = "4", pages = "295--336", month = dec, year = "2003", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Dec 13 18:01:35 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Lakshmanan:2003:EDM, author = "Laks V. S. Lakshmanan and Carson Kai-Sang Leung and Raymond T. Ng", title = "Efficient dynamic mining of constrained frequent sets", journal = j-TODS, volume = "28", number = "4", pages = "337--389", month = dec, year = "2003", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Dec 13 18:01:35 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Cho:2003:EPR, author = "Junghoo Cho and Hector Garcia-Molina", title = "Effective page refresh policies for {Web} crawlers", journal = j-TODS, volume = "28", number = "4", pages = "390--426", month = dec, year = "2003", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Dec 13 18:01:35 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Chomicki:2003:PFR, author = "Jan Chomicki", title = "Preference formulas in relational queries", journal = j-TODS, volume = "28", number = "4", pages = "427--466", month = dec, year = "2003", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Dec 13 18:01:35 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Diao:2003:PSP, author = "Yanlei Diao and Mehmet Altinel and Michael J. Franklin and Hao Zhang and Peter Fischer", title = "Path sharing and predicate evaluation for high-performance {XML} filtering", journal = j-TODS, volume = "28", number = "4", pages = "467--516", month = dec, year = "2003", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Dec 13 18:01:35 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Hjaltason:2003:IDS, author = "Gisli R. Hjaltason and Hanan Samet", title = "Index-driven similarity search in metric spaces", journal = j-TODS, volume = "28", number = "4", pages = "517--580", month = dec, year = "2003", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Dec 13 18:01:35 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Kolaitis:2004:F, author = "Phokion Kolaitis and Michael J. Franklin", title = "Foreword", journal = j-TODS, volume = "29", number = "1", pages = "1--1", month = mar, year = "2004", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Jul 10 10:03:25 MDT 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Buneman:2004:ASD, author = "Peter Buneman and Sanjeev Khanna and Keishi Tajima and Wang-Chiew Tan", title = "Archiving scientific data", journal = j-TODS, volume = "29", number = "1", pages = "2--42", month = mar, year = "2004", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Jul 10 10:03:25 MDT 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Garofalakis:2004:PWS, author = "Minos Garofalakis and Phillip B. Gibbons", title = "Probabilistic wavelet synopses", journal = j-TODS, volume = "29", number = "1", pages = "43--90", month = mar, year = "2004", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Jul 10 10:03:25 MDT 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Grust:2004:AXE, author = "Torsten Grust and Maurice {Van Keulen} and Jens Teubner", title = "Accelerating {XPath} evaluation in any {RDBMS}", journal = j-TODS, volume = "29", number = "1", pages = "91--131", month = mar, year = "2004", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Jul 10 10:03:25 MDT 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Ross:2004:SCM, author = "Kenneth A. Ross", title = "Selection conditions in main memory", journal = j-TODS, volume = "29", number = "1", pages = "132--161", month = mar, year = "2004", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Jul 10 10:03:25 MDT 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Arasu:2004:CMR, author = "Arvind Arasu and Brian Babcock and Shivnath Babu and Jon McAlister and Jennifer Widom", title = "Characterizing memory requirements for queries over continuous data streams", journal = j-TODS, volume = "29", number = "1", pages = "162--194", month = mar, year = "2004", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Jul 10 10:03:25 MDT 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Arenas:2004:NFX, author = "Marcelo Arenas and Leonid Libkin", title = "A normal form for {XML} documents", journal = j-TODS, volume = "29", number = "1", pages = "195--232", month = mar, year = "2004", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Jul 10 10:03:25 MDT 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Amer-Yahia:2004:DAO, author = "Sihem Amer-Yahia and Sophie Cluet", title = "A declarative approach to optimize bulk loading into databases", journal = j-TODS, volume = "29", number = "2", pages = "233--281", month = jun, year = "2004", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Jul 10 10:03:25 MDT 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Sadri:2004:EOS, author = "Reza Sadri and Carlo Zaniolo and Amir Zarkesh and Jafar Adibi", title = "Expressing and optimizing sequence queries in database systems", journal = j-TODS, volume = "29", number = "2", pages = "282--318", month = jun, year = "2004", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Jul 10 10:03:25 MDT 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Marian:2004:ETQ, author = "Am{\'e}lie Marian and Nicolas Bruno and Luis Gravano", title = "Evaluating top-$k$ queries over {Web}-accessible databases", journal = j-TODS, volume = "29", number = "2", pages = "319--362", month = jun, year = "2004", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Jul 10 10:03:25 MDT 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Yu:2004:CAM, author = "Ting Yu and Divesh Srivastava and Laks V. S. Lakshmanan and H. V. Jagadish", title = "A compressed accessibility map for {XML}", journal = j-TODS, volume = "29", number = "2", pages = "363--402", month = jun, year = "2004", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Jul 10 10:03:25 MDT 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Datta:2004:PBA, author = "Anindya Datta and Kaushik Dutta and Helen Thomas and Debra Vandermeer and Krithi Ramamritham", title = "Proxy-based acceleration of dynamically generated content on the {World Wide Web}: {An} approach and implementation", journal = j-TODS, volume = "29", number = "2", pages = "403--443", month = jun, year = "2004", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Jul 10 10:03:25 MDT 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Vincent:2004:SFD, author = "Millist W. Vincent and Jixue Liu and Chengfei Liu", title = "Strong functional dependencies and their application to normal forms in {XML}", journal = j-TODS, volume = "29", number = "3", pages = "445--462", month = sep, year = "2004", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Nov 4 08:30:22 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Trajcevski:2004:MUM, author = "Goce Trajcevski and Ouri Wolfson and Klaus Hinrichs and Sam Chamberlain", title = "Managing uncertainty in moving objects databases", journal = j-TODS, volume = "29", number = "3", pages = "463--507", month = sep, year = "2004", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Nov 4 08:30:22 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Chaudhuri:2004:EPM, author = "Surajit Chaudhuri and Vivek Narasayya and Sunita Sarawagi", title = "Extracting predicates from mining models for efficient query evaluation", journal = j-TODS, volume = "29", number = "3", pages = "508--544", month = sep, year = "2004", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Nov 4 08:30:22 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Babu:2004:ECR, author = "Shivnath Babu and Utkarsh Srivastava and Jennifer Widom", title = "Exploiting $k$-constraints to reduce memory overhead in continuous queries over data streams", journal = j-TODS, volume = "29", number = "3", pages = "545--580", month = sep, year = "2004", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Nov 4 08:30:22 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Ozsoyoglu:2004:QWM, author = "G{\"u}ltekin {\"O}zsoyo{\u{g}}lu and Ismail Seng{\"o}r Alting{\"o}vde and Abdullah Al-Hamdani and Selma Ay{\c{s}}e {\"O}zel and {\"O}zg{\"u}r Ulusoy and Zehra Meral {\"o}zsoyo{\u{g}}lu", title = "Querying {Web} metadata: {Native} score management and text support in databases", journal = j-TODS, volume = "29", number = "4", pages = "581--634", month = dec, year = "2004", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Apr 14 10:39:39 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Baralis:2004:ECR, author = "Elena Baralis and Silvia Chiusano", title = "Essential classification rule sets", journal = j-TODS, volume = "29", number = "4", pages = "635--674", month = dec, year = "2004", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Apr 14 10:39:39 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Chen:2004:MBV, author = "Songting Chen and Bin Liu and Elke A. Rundensteiner", title = "Multiversion-based view maintenance over distributed data sources", journal = j-TODS, volume = "29", number = "4", pages = "675--709", month = dec, year = "2004", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Apr 14 10:39:39 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Balmin:2004:IVX, author = "Andrey Balmin and Yannis Papakonstantinou and Victor Vianu", title = "Incremental validation of {XML} documents", journal = j-TODS, volume = "29", number = "4", pages = "710--751", month = dec, year = "2004", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Apr 14 10:39:39 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Green:2004:PXS, author = "Todd J. Green and Ashish Gupta and Gerome Miklau and Makoto Onizuka and Dan Suciu", title = "Processing {XML} streams with deterministic automata and stream indexes", journal = j-TODS, volume = "29", number = "4", pages = "752--788", month = dec, year = "2004", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Apr 14 10:39:39 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Samet:2004:DPG, author = "Hanan Samet", title = "Decoupling partitioning and grouping: {Overcoming} shortcomings of spatial indexing with bucketing", journal = j-TODS, volume = "29", number = "4", pages = "789--830", month = dec, year = "2004", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Apr 14 10:39:39 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Milo:2005:EIX, author = "Tova Milo and Serge Abiteboul and Bernd Amann and Omar Benjelloun and Fred Dang Ngoc", title = "Exchanging intensional {XML} data", journal = j-TODS, volume = "30", number = "1", pages = "1--40", month = mar, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat May 7 08:01:30 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Papadias:2005:PSC, author = "Dimitris Papadias and Yufei Tao and Greg Fu and Bernhard Seeger", title = "Progressive skyline computation in database systems", journal = j-TODS, volume = "30", number = "1", pages = "41--82", month = mar, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat May 7 08:01:30 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Witkowski:2005:ASM, author = "Andrew Witkowski and Srikanth Bellamkonda and Tolga Bozkaya and Nathan Folkert and Abhinav Gupta and John Haydu and Lei Sheng and Sankar Subramanian", title = "Advanced {SQL} modeling in {RDBMS}", journal = j-TODS, volume = "30", number = "1", pages = "83--121", month = mar, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat May 7 08:01:30 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Madden:2005:TAQ, author = "Samuel R. Madden and Michael J. Franklin and Joseph M. Hellerstein and Wei Hong", title = "{TinyDB}: an acquisitional query processing system for sensor networks", journal = j-TODS, volume = "30", number = "1", pages = "122--173", month = mar, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat May 7 08:01:30 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Fagin:2005:DEG, author = "Ronald Fagin and Phokion G. Kolaitis and Lucian Popa", title = "Data exchange: getting to the core", journal = j-TODS, volume = "30", number = "1", pages = "174--210", month = mar, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat May 7 08:01:30 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Pu:2005:CDS, author = "Ken Q. Pu and Alberto O. Mendelzon", title = "Concise descriptions of subsets of structured sets", journal = j-TODS, volume = "30", number = "1", pages = "211--248", month = mar, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat May 7 08:01:30 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Cormode:2005:WHW, author = "Graham Cormode and S. Muthukrishnan", title = "What's hot and what's not: tracking most frequent items dynamically", journal = j-TODS, volume = "30", number = "1", pages = "249--278", month = mar, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat May 7 08:01:30 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Garofalakis:2005:XSP, author = "Minos Garofalakis and Amit Kumar", title = "{XML} stream processing using tree-edit distance embeddings", journal = j-TODS, volume = "30", number = "1", pages = "279--332", month = mar, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat May 7 08:01:30 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Geerts:2005:TUB, author = "Floris Geerts and Bart Goethals and Jan {Van Den Bussche}", title = "Tight upper bounds on the number of candidate patterns", journal = j-TODS, volume = "30", number = "2", pages = "333--363", month = jun, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 7 14:14:12 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Jagadish:2005:IAB, author = "H. V. Jagadish and Beng Chin Ooi and Kian-Lee Tan and Cui Yu and Rui Zhang", title = "{iDistance}: {An} adaptive {B$^+$}-tree based indexing method for nearest neighbor search", journal = j-TODS, volume = "30", number = "2", pages = "364--397", month = jun, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 7 14:14:12 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Braga:2005:XXU, author = "Daniele Braga and Alessandro Campi and Stefano Ceri", title = "{{\em XQBE} ({\em XQ\/}uery {\em B\/}y {\em E\/}xample)}: a visual interface to the standard {XML} query language", journal = j-TODS, volume = "30", number = "2", pages = "398--443", month = jun, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 7 14:14:12 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Gottlob:2005:EAP, author = "Georg Gottlob and Christoph Koch and Reinhard Pichler", title = "Efficient algorithms for processing {XPath} queries", journal = j-TODS, volume = "30", number = "2", pages = "444--491", month = jun, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 7 14:14:12 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Fekete:2005:MSI, author = "Alan Fekete and Dimitrios Liarokapis and Elizabeth O'Neil and Patrick O'Neil and Dennis Shasha", title = "Making snapshot isolation serializable", journal = j-TODS, volume = "30", number = "2", pages = "492--528", month = jun, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 7 14:14:12 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Papadias:2005:ANN, author = "Dimitris Papadias and Yufei Tao and Kyriakos Mouratidis and Chun Kit Hui", title = "Aggregate nearest neighbor queries in spatial databases", journal = j-TODS, volume = "30", number = "2", pages = "529--576", month = jun, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 7 14:14:12 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Peng:2005:XSX, author = "Feng Peng and Sudarshan S. Chawathe", title = "{XSQ}: a streaming {XPath} engine", journal = j-TODS, volume = "30", number = "2", pages = "577--623", month = jun, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 7 14:14:12 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Wyss:2005:RLM, author = "Catharine M. Wyss and Edward L. Robertson", title = "Relational languages for metadata integration", journal = j-TODS, volume = "30", number = "2", pages = "624--660", month = jun, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 7 14:14:12 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Zhang:2005:GMD, author = "Rui Zhang and Panos Kalnis and Beng Chin Ooi and Kian-Lee Tan", title = "Generalized multidimensional data mapping and query processing", journal = j-TODS, volume = "30", number = "3", pages = "661--697", month = sep, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Oct 12 07:55:28 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Pang:2005:IMS, author = "Chaoyi Pang and Guozhu Dong and Kotagiri Ramamohanarao", title = "Incremental maintenance of shortest distance and transitive closure in first-order logic and {SQL}", journal = j-TODS, volume = "30", number = "3", pages = "698--721", month = sep, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Oct 12 07:55:28 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Wijsen:2005:DRU, author = "Jef Wijsen", title = "Database repairing using updates", journal = j-TODS, volume = "30", number = "3", pages = "722--768", month = sep, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Oct 12 07:55:28 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Litwin:2005:LHA, author = "Witold Litwin and Rim Moussa and Thomas Schwarz", title = "{LH*RS}---a highly-available scalable distributed data structure", journal = j-TODS, volume = "30", number = "3", pages = "769--811", month = sep, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Oct 12 07:55:28 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{He:2005:STC, author = "Zhen He and Byung Suk Lee and Robert Snapp", title = "Self-tuning cost modeling of user-defined functions in an object-relational {DBMS}", journal = j-TODS, volume = "30", number = "3", pages = "812--853", month = sep, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Oct 12 07:55:28 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Hurtado:2005:CSI, author = "Carlos A. Hurtado and Claudio Gutierrez and Alberto O. Mendelzon", title = "Capturing summarizability with integrity constraints in {OLAP}", journal = j-TODS, volume = "30", number = "3", pages = "854--886", month = sep, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Oct 12 07:55:28 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Suciu:2005:F, author = "Dan Suciu and Gerhard Weikum", title = "Foreword", journal = j-TODS, volume = "30", number = "4", pages = "887--887", month = dec, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Feb 16 11:31:47 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Garofalakis:2005:WSG, author = "Minos Garofalakis and Amit Kumar", title = "Wavelet synopses for general error metrics", journal = j-TODS, volume = "30", number = "4", pages = "888--928", month = dec, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Feb 16 11:31:47 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Marx:2005:CX, author = "Maarten Marx", title = "Conditional {XPath}", journal = j-TODS, volume = "30", number = "4", pages = "929--959", month = dec, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Feb 16 11:31:47 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Yan:2005:GIB, author = "Xifeng Yan and Philip S. Yu and Jiawei Han", title = "Graph indexing based on discriminative frequent structure analysis", journal = j-TODS, volume = "30", number = "4", pages = "960--993", month = dec, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Feb 16 11:31:47 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Fagin:2005:CSM, author = "Ronald Fagin and Phokion G. Kolaitis and Lucian Popa and Wang-Chiew Tan", title = "Composing schema mappings: {Second-order} dependencies to the rescue", journal = j-TODS, volume = "30", number = "4", pages = "994--1055", month = dec, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Feb 16 11:31:47 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Bowman:2005:OQS, author = "Ivan T. Bowman and Kenneth Salem", title = "Optimization of query streams using semantic prefetching", journal = j-TODS, volume = "30", number = "4", pages = "1056--1101", month = dec, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Feb 16 11:31:47 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Kaushik:2005:SQO, author = "Raghav Kaushik and Jeffrey F. Naughton and Raghu Ramakrishnan and Venkatesan T. Chakravarthy", title = "Synopses for query optimization: a space-complexity perspective", journal = j-TODS, volume = "30", number = "4", pages = "1102--1127", month = dec, year = "2005", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Feb 16 11:31:47 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Wu:2006:OBI, author = "Kesheng Wu and Ekow J. Otoo and Arie Shoshani", title = "Optimizing bitmap indices with efficient compression", journal = j-TODS, volume = "31", number = "1", pages = "1--38", month = mar, year = "2006", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri May 26 08:20:49 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Schneider:2006:TRB, author = "Markus Schneider and Thomas Behr", title = "Topological relationships between complex spatial objects", journal = j-TODS, volume = "31", number = "1", pages = "39--81", month = mar, year = "2006", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri May 26 08:20:49 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Jaluta:2006:BTC, author = "Ibrahim Jaluta and Seppo Sippu and Eljas Soisalon-Soininen", title = "{B}-tree concurrency control and recovery in page-server database systems", journal = j-TODS, volume = "31", number = "1", pages = "82--132", month = mar, year = "2006", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri May 26 08:20:49 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Gray:2006:CTC, author = "Jim Gray and Leslie Lamport", title = "Consensus on transaction commit", journal = j-TODS, volume = "31", number = "1", pages = "133--160", month = mar, year = "2006", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri May 26 08:20:49 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Guha:2006:IXD, author = "Sudipto Guha and H. V. Jagadish and Nick Koudas and Divesh Srivastava and Ting Yu", title = "Integrating {XML} data sources using approximate joins", journal = j-TODS, volume = "31", number = "1", pages = "161--207", month = mar, year = "2006", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri May 26 08:20:49 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Abiteboul:2006:RQX, author = "Serge Abiteboul and Luc Segoufin and Victor Vianu", title = "Representing and querying {XML} with incomplete information", journal = j-TODS, volume = "31", number = "1", pages = "208--254", month = mar, year = "2006", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri May 26 08:20:49 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Pelanis:2006:IPP, author = "Mindaugas Pelanis and Simonas {\v{S}}altenis and Christian S. Jensen", title = "Indexing the past, present, and anticipated future positions of moving objects", journal = j-TODS, volume = "31", number = "1", pages = "255--298", month = mar, year = "2006", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri May 26 08:20:49 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Rao:2006:SXD, author = "Praveen Rao and Bongki Moon", title = "Sequencing {XML} data and query twigs for fast pattern matching", journal = j-TODS, volume = "31", number = "1", pages = "299--345", month = mar, year = "2006", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri May 26 08:20:49 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{He:2006:ACS, author = "Bin He and Kevin Chen-Chuan Chang", title = "Automatic complex schema matching across {Web} query interfaces: a correlation mining approach", journal = j-TODS, volume = "31", number = "1", pages = "346--395", month = mar, year = "2006", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri May 26 08:20:49 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Guha:2006:ASA, author = "Sudipto Guha and Nick Koudas and Kyuseok Shim", title = "Approximation and streaming algorithms for histogram construction problems", journal = j-TODS, volume = "31", number = "1", pages = "396--438", month = mar, year = "2006", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri May 26 08:20:49 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Qian:2006:DIM, author = "Gang Qian and Qiang Zhu and Qiang Xue and Sakti Pramanik", title = "Dynamic indexing for multidimensional non-ordered discrete data spaces using a data-partitioning approach", journal = j-TODS, volume = "31", number = "2", pages = "439--484", month = jun, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1138394.1138395", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jun 14 10:11:33 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Iwerks:2006:MNS, author = "Glenn S. Iwerks and Hanan Samet and Kenneth P. Smith", title = "Maintenance of {$K$}-nn and spatial join queries on continuously moving points", journal = j-TODS, volume = "31", number = "2", pages = "485--536", month = jun, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1138394.1138396", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jun 14 10:11:33 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Pentaris:2006:QOD, author = "Fragkiskos Pentaris and Yannis Ioannidis", title = "Query optimization in distributed networks of autonomous database systems", journal = j-TODS, volume = "31", number = "2", pages = "537--583", month = jun, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1138394.1138397", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jun 14 10:11:33 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Lin:2006:SLT, author = "Xuemin Lin and Qing Liu and Yidong Yuan and Xiaofang Zhou and Hongjun Lu", title = "Summarizing level-two topological relations in large spatial datasets", journal = j-TODS, volume = "31", number = "2", pages = "584--630", month = jun, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1138394.1138398", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jun 14 10:11:33 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Bright:2006:APB, author = "Laura Bright and Avigdor Gal and Louiqa Raschid", title = "Adaptive pull-based policies for wide area data delivery", journal = j-TODS, volume = "31", number = "2", pages = "631--671", month = jun, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1138394.1138399", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jun 14 10:11:33 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Cohen:2006:RQA, author = "Sara Cohen and Werner Nutt and Yehoshua Sagiv", title = "Rewriting queries with arbitrary aggregation functions using views", journal = j-TODS, volume = "31", number = "2", pages = "672--715", month = jun, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1138394.1138400", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jun 14 10:11:33 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Kalashnikov:2006:DID, author = "Dmitri V. Kalashnikov and Sharad Mehrotra", title = "Domain-independent data cleaning via analysis of entity-relationship graph", journal = j-TODS, volume = "31", number = "2", pages = "716--767", month = jun, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1138394.1138401", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jun 14 10:11:33 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Eiter:2006:ISI, author = "Thomas Eiter and Leonid Libkin", title = "Introduction to special {ICDT} section", journal = j-TODS, volume = "31", number = "3", pages = "769--769", month = sep, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1166074.1166075", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 17 05:41:01 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Martens:2006:ECX, author = "Wim Martens and Frank Neven and Thomas Schwentick and Geert Jan Bex", title = "Expressiveness and complexity of {XML Schema}", journal = j-TODS, volume = "31", number = "3", pages = "770--813", month = sep, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1166074.1166076", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 17 05:41:01 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Shaft:2006:TNN, author = "Uri Shaft and Raghu Ramakrishnan", title = "Theory of nearest neighbors indexability", journal = j-TODS, volume = "31", number = "3", pages = "814--838", month = sep, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1166074.1166077", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 17 05:41:01 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Braganholo:2006:PFA, author = "Vanessa P. Braganholo and Susan B. Davidson and Carlos A. Heuser", title = "{PATAX{\'O}}: a framework to allow updates through {XML} views", journal = j-TODS, volume = "31", number = "3", pages = "839--886", month = sep, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1166074.1166078", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 17 05:41:01 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Harizopoulos:2006:IIC, author = "Stavros Harizopoulos and Anastassia Ailamaki", title = "Improving instruction cache performance in {OLTP}", journal = j-TODS, volume = "31", number = "3", pages = "887--920", month = sep, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1166074.1166079", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 17 05:41:01 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Shao:2006:TNV, author = "Feng Shao and Antal Novak and Jayavel Shanmugasundaram", title = "Triggers over nested views of relational data", journal = j-TODS, volume = "31", number = "3", pages = "921--967", month = sep, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1166074.1166080", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 17 05:41:01 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{May:2006:SQU, author = "Norman May and Sven Helmer and Guido Moerkotte", title = "Strategies for query unnesting in {XML} databases", journal = j-TODS, volume = "31", number = "3", pages = "968--1013", month = sep, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1166074.1166081", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 17 05:41:01 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Polyzotis:2006:XSX, author = "Neoklis Polyzotis and Minos Garofalakis", title = "{XSKETCH} synopses for {XML} data graphs", journal = j-TODS, volume = "31", number = "3", pages = "1014--1063", month = sep, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1166074.1166082", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 17 05:41:01 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Sugumaran:2006:RDO, author = "Vijayan Sugumaran and Veda C. Storey", title = "The role of domain ontologies in database design: {An} ontology management and conceptual modeling environment", journal = j-TODS, volume = "31", number = "3", pages = "1064--1094", month = sep, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1166074.1166083", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 17 05:41:01 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Metwally:2006:IES, author = "Ahmed Metwally and Divyakant Agrawal and Amr {El Abbadi}", title = "An integrated efficient solution for computing frequent and top-$k$ elements in data streams", journal = j-TODS, volume = "31", number = "3", pages = "1095--1133", month = sep, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1166074.1166084", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 17 05:41:01 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "See comments in \cite{Liu:2010:CIE}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Chaudhuri:2006:PIR, author = "Surajit Chaudhuri and Gautam Das and Vagelis Hristidis and Gerhard Weikum", title = "Probabilistic information retrieval approach for ranking of database query results", journal = j-TODS, volume = "31", number = "3", pages = "1134--1168", month = sep, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1166074.1166085", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 17 05:41:01 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Calders:2006:EPA, author = "Toon Calders and Laks V. S. Lakshmanan and Raymond T. Ng and Jan Paredaens", title = "Expressive power of an algebra for data mining", journal = j-TODS, volume = "31", number = "4", pages = "1169--1214", month = dec, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1189769.1189770", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:36:31 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The relational data model has simple and clear foundations on which significant theoretical and systems research has flourished. By contrast, most research on data mining has focused on algorithmic issues. A major open question is: what's an appropriate foundation for data mining, which can accommodate disparate mining tasks? We address this problem by presenting a database model and an algebra for data mining. The database model is based on the 3W-model introduced by Johnson et al. [2000]. This model relied on black box mining operators. A main contribution of this article is to open up these black boxes, by using generic operators in a data mining algebra. Two key operators in this algebra are regionize, which creates regions (or models) from data tuples, and a restricted form of looping called mining loop. Then the resulting data mining algebra MA is studied and properties concerning expressive power and complexity are established. We present results in three directions: (1) expressiveness of the mining algebra; (2) relations with alternative frameworks, and (3) interactions between regionize and mining loop.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Algebra; data mining; expressive power", } @Article{Koch:2006:CNX, author = "Christoph Koch", title = "On the complexity of nonrecursive {XQuery} and functional query languages on complex values", journal = j-TODS, volume = "31", number = "4", pages = "1215--1256", month = dec, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1189769.1189771", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:36:31 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "This article studies the complexity of evaluating functional query languages for complex values such as monad algebra and the recursion-free fragment of XQuery. We show that monad algebra, with equality restricted to atomic values, is complete for the class TA[2 O ( n ), O ( n )] of problems solvable in linear exponential time with a linear number of alternations if the query is assumed to be part of the input. The monotone fragment of monad algebra with atomic value equality but without negation is NEXPTIME-complete. For monad algebra with deep value equality, that is, equality of complex values, we establish TA[2 O ( n ), O ( n )] lower and exponential-space upper bounds. We also study a fragment of XQuery, Core XQuery, that seems to incorporate all the features of a query language on complex values that are traditionally deemed essential. A close connection between monad algebra on lists and Core XQuery (with ``child'' as the only axis) is exhibited. The two languages are shown expressively equivalent up to representation issues. We show that Core XQuery is just as hard as monad algebra with respect to query and combined complexity. As Core XQuery is NEXPTIME-hard, the best-known techniques for processing such problems require exponential amounts of working memory and doubly exponential time in the worst case. We present a property of queries---the lack of a certain form of composition---that virtually all real-world XQueries have and that allows for query evaluation in PSPACE and thus singly exponential time. Still, we are able to show for an important special case---Core XQuery with equality testing restricted to atomic values---that the composition-free language is just as expressive as the language with composition. Thus, under widely-held complexity-theoretic assumptions, the language with composition is an exponentially more succinct version of the composition-free language.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Complex values; complexity; conservativity; expressiveness; monad algebra; nested-relational algebra; XML; XQuery", } @Article{Ilyas:2006:ARA, author = "Ihab F. Ilyas and Walid G. Aref and Ahmed K. Elmagarmid and Hicham G. Elmongui and Rahul Shah and Jeffrey Scott Vitter", title = "Adaptive rank-aware query optimization in relational databases", journal = j-TODS, volume = "31", number = "4", pages = "1257--1304", month = dec, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1189769.1189772", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:36:31 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Rank-aware query processing has emerged as a key requirement in modern applications. In these applications, efficient and adaptive evaluation of top- k queries is an integral part of the application semantics. In this article, we introduce a rank-aware query optimization framework that fully integrates rank-join operators into relational query engines. The framework is based on extending the System R dynamic programming algorithm in both enumeration and pruning. We define ranking as an interesting physical property that triggers the generation of rank-aware query plans. Unlike traditional join operators, optimizing for rank-join operators depends on estimating the input cardinality of these operators. We introduce a probabilistic model for estimating the input cardinality, and hence the cost of a rank-join operator. To our knowledge, this is the first effort in estimating the needed input size for optimal rank aggregation algorithms. Costing ranking plans is key to the full integration of rank-join operators in real-world query processing engines. Since optimal execution strategies picked by static query optimizers lose their optimality due to estimation errors and unexpected changes in the computing environment, we introduce several adaptive execution strategies for top- k queries that respond to these unexpected changes and costing errors. Our reactive reoptimization techniques change the execution plan at runtime to significantly enhance the performance of running queries. Since top- k query plans are usually pipelined and maintain a complex ranking state, altering the execution strategy of a running ranking query is an important and challenging task. We conduct an extensive experimental study to evaluate the performance of the proposed framework. The experimental results are twofold: (1) we show the effectiveness of our cost-based approach of integrating ranking plans in dynamic programming cost-based optimizers; and (2) we show a significant speedup (up to 300\%) when using our adaptive execution of ranking plans over the state-of-the-art mid-query reoptimization strategies.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "adaptive processing; Advanced query processing; rank-aware optimization; ranking; top-k", } @Article{Jiao:2006:MSS, author = "Yishan Jiao", title = "Maintaining stream statistics over multiscale sliding windows", journal = j-TODS, volume = "31", number = "4", pages = "1305--1334", month = dec, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1189769.1189773", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:36:31 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In this article, we propose a new multiscale sliding window model which differentiates data items in different time periods of the data stream, based on a reasonable monotonicity of resolution assumption. Our model, as a well-motivated extension of the sliding window model, stands halfway between the traditional all-history and time-decaying models. We also present algorithms for estimating two significant data stream statistics--- $ F_0 $ and Jacard's similarity coefficient---with reasonable accuracies under the new model. Our algorithms use space logarithmic in the data stream size and linear in the number of windows; they support update time logarithmic in the number of windows and independent of the accuracy required. Our algorithms are easy to implement. Experimental results demonstrate the efficiencies of our algorithms. Our techniques apply to scenarios in which universe sampling is used.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Data stream; F 0; Jacard's similarity coefficient; multiscale sliding window model", } @Article{Pei:2006:TMS, author = "Jian Pei and Yidong Yuan and Xuemin Lin and Wen Jin and Martin Ester and Qing Liu and Wei Wang and Yufei Tao and Jeffrey Xu Yu and Qing Zhang", title = "Towards multidimensional subspace skyline analysis", journal = j-TODS, volume = "31", number = "4", pages = "1335--1381", month = dec, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1189769.1189774", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:36:31 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The skyline operator is important for multicriteria decision-making applications. Although many recent studies developed efficient methods to compute skyline objects in a given space, none of them considers skylines in multiple subspaces simultaneously. More importantly, the fundamental problem on the semantics of skylines remains open: Why and in which subspaces is (or is not) an object in the skyline? Practically, users may also be interested in the skylines in any subspaces. Then, what is the relationship between the skylines in the subspaces and those in the super-spaces? How can we effectively analyze the subspace skylines? Can we efficiently compute skylines in various subspaces and answer various analytical queries?In this article, we tackle the problem of multidimensional subspace skyline computation and analysis. We explore skylines in subspaces. First, we propose the concept of Skycube, which consists of skylines of all possible nonempty subspaces of a given full space. Once a Skycube is materialized, any subspace skyline queries can be answered online. However, Skycube cannot fully address the semantic concerns and may contain redundant information. To tackle the problem, we introduce a novel notion of skyline group which essentially is a group of objects that coincide in the skylines of some subspaces. We identify the decisive subspaces that qualify skyline groups in the subspace skylines. The new notions concisely capture the semantics and the structures of skylines in various subspaces. Multidimensional roll-up and drill-down analysis is introduced. We also develop efficient algorithms to compute Skycube, skyline groups and their decisive subspaces. A systematic performance study using both real data sets and synthetic data sets is reported to evaluate our approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data cubing; multidimensional data analysis; Skyline query", } @Article{Jermaine:2006:SMS, author = "Christopher Jermaine and Alin Dobra and Subramanian Arumugam and Shantanu Joshi and Abhijit Pol", title = "The {Sort-Merge-Shrink} join", journal = j-TODS, volume = "31", number = "4", pages = "1382--1416", month = dec, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1189769.1189775", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:36:31 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "One of the most common operations in analytic query processing is the application of an aggregate function to the result of a relational join. We describe an algorithm called the Sort-Merge-Shrink (SMS) Join for computing the answer to such a query over large, disk-based input tables. The key innovation of the SMS join is that if the input data are clustered in a statistically random fashion on disk, then at all times, the join provides an online, statistical estimator for the eventual answer to the query as well as probabilistic confidence bounds. Thus, a user can monitor the progress of the join throughout its execution and stop the join when satisfied with the estimate's accuracy or run the algorithm to completion with a total time requirement that is not much longer than that of other common join algorithms. This contrasts with other online join algorithms, which either do not offer such statistical guarantees or can only offer guarantees so long as the input data can fit into main memory.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "nonparametric statistics; OLAP; Online algorithms", } @Article{Afrati:2006:FSS, author = "Foto Afrati and Jennifer Widom", title = "Foreword to special section on {SIGMOD\slash PODS} 2005", journal = j-TODS, volume = "31", number = "4", pages = "1417--1417", month = dec, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1189769.1189776", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:36:31 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Yan:2006:FBS, author = "Xifeng Yan and Feida Zhu and Philip S. Yu and Jiawei Han", title = "Feature-based similarity search in graph structures", journal = j-TODS, volume = "31", number = "4", pages = "1418--1453", month = dec, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1189769.1189777", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:36:31 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Similarity search of complex structures is an important operation in graph-related applications since exact matching is often too restrictive. In this article, we investigate the issues of substructure similarity search using indexed features in graph databases. By transforming the edge relaxation ratio of a query graph into the maximum allowed feature misses, our structural filtering algorithm can filter graphs without performing pairwise similarity computation. It is further shown that using either too few or too many features can result in poor filtering performance. Thus the challenge is to design an effective feature set selection strategy that could maximize the filtering capability. We prove that the complexity of optimal feature set selection is $ \Omega (2^m) $ in the worst case, where $m$ is the number of features for selection. In practice, we identify several criteria to build effective feature sets for filtering, and demonstrate that combining features with similar size and selectivity can improve the filtering and search performance significantly within a multifilter composition framework. The proposed feature-based filtering concept can be generalized and applied to searching approximate nonconsecutive sequences, trees, and other structured data as well.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "complexity; Graph database; index; similarity search", } @Article{Fuxman:2006:PDE, author = "Ariel Fuxman and Phokion G. Kolaitis and Ren{\'e}e J. Miller and Wang-Chiew Tan", title = "Peer data exchange", journal = j-TODS, volume = "31", number = "4", pages = "1454--1498", month = dec, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1189769.1189778", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:36:31 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In this article, we introduce and study a framework, called peer data exchange, for sharing and exchanging data between peers. This framework is a special case of a full-fledged peer data management system and a generalization of data exchange between a source schema and a target schema. The motivation behind peer data exchange is to model authority relationships between peers, where a source peer may contribute data to a target peer, specified using source-to-target constraints, and a target peer may use target-to-source constraints to restrict the data it is willing to receive, but cannot modify the data of the source peer.A fundamental algorithmic problem in this framework is that of deciding the existence of a solution: given a source instance and a target instance for a fixed peer data exchange setting, can the target instance be augmented in such a way that the source instance and the augmented target instance satisfy all constraints of the setting? We investigate the computational complexity of the problem for peer data exchange settings in which the constraints are given by tuple generating dependencies. We show that this problem is always in NP, and that it can be NP-complete even for ``acyclic'' peer data exchange settings. We also show that the data complexity of the certain answers of target conjunctive queries is in coNP, and that it can be coNP-complete even for ``acyclic'' peer data exchange settings. After this, we explore the boundary between tractability and intractability for deciding the existence of a solution and for computing the certain answers of target conjunctive queries. To this effect, we identify broad syntactic conditions on the constraints between the peers under which the existence-of-solutions problem is solvable in polynomial time. We also identify syntactic conditions between peer data exchange settings and target conjunctive queries that yield polynomial-time algorithms for computing the certain answers. For both problems, these syntactic conditions turn out to be tight, in the sense that minimal relaxations of them lead to intractability. Finally, we introduce the concept of a universal basis of solutions in peer data exchange and explore its properties.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "certain answers; conjunctive queries; Data exchange; data integration; metadata model management; schema mapping", } @Article{Cheng:2006:DMM, author = "David Cheng and Ravi Kannan and Santosh Vempala and Grant Wang", title = "A divide-and-merge methodology for clustering", journal = j-TODS, volume = "31", number = "4", pages = "1499--1525", month = dec, year = "2006", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1189769.1189779", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:36:31 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We present a divide-and-merge methodology for clustering a set of objects that combines a top-down ``divide'' phase with a bottom-up ``merge'' phase. In contrast, previous algorithms use either top-down or bottom-up methods to construct a hierarchical clustering or produce a flat clustering using local search (e.g., k -means). For the divide phase, which produces a tree whose leaves are the elements of the set, we suggest an efficient spectral algorithm. When the data is in the form of a sparse document-term matrix, we show how to modify the algorithm so that it maintains sparsity and runs in linear space. The merge phase quickly finds the optimal partition that respects the tree for many natural objective functions, for example, k -means, min-diameter, min-sum, correlation clustering, etc. We present a thorough experimental evaluation of the methodology. We describe the implementation of a meta-search engine that uses this methodology to cluster results from web searches. We also give comparative empirical results on several real datasets.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Clustering; data mining; information retrieval", } @Article{Snodgrass:2007:ESV, author = "Richard T. Snodgrass", title = "Editorial: {Single}- versus double-blind reviewing", journal = j-TODS, volume = "32", number = "1", pages = "1:1--1:??", month = mar, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1206049.1206050", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:36:55 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "This editorial analyzes from a variety of perspectives the controversial issue of single-blind versus double-blind reviewing. In single-blind reviewing, the reviewer is unknown to the author, but the identity of the author is known to the reviewer. Double-blind reviewing is more symmetric: The identity of the author and the reviewer are not revealed to each other. We first examine the significant scholarly literature regarding blind reviewing. We then list six benefits claimed for double-blind reviewing and 21 possible costs. To compare these benefits and costs, we propose a double-blind policy for TODS that attempts to minimize the costs while retaining the core benefit of fairness that double-blind reviewing provides, and evaluate that policy against each of the listed benefits and costs. Following that is a general discussion considering several questions: What does this have to do with TODS, does bias exist in computer science, and what is the appropriate decision procedure? We explore the ``knobs'' a policy design can manipulate to fine-tune a double-blind review policy. This editorial ends with a specific decision.", acknowledgement = ack-nhfb, articleno = "1", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Anonymous citation; blinding efficacy; double-blind review; gender bias; single-blind review; status bias", } @Article{Pourabbas:2007:EEJ, author = "Elaheh Pourabbas and Arie Shoshani", title = "Efficient estimation of joint queries from multiple {OLAP} databases", journal = j-TODS, volume = "32", number = "1", pages = "2:1--2:??", month = mar, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1206049.1206051", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:36:55 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Given an OLAP query expressed over multiple source OLAP databases, we study the problem of estimating the resulting OLAP target database. The problem arises when it is not possible to derive the result from a single database. The method we use is linear indirect estimation, commonly used for statistical estimation. We examine two obvious computational methods for computing such a target database, called the full cross-product (F) and preaggregation (P) methods. We study the accuracy and computational cost of these methods. While the F method provides a more accurate estimate, it is more expensive computationally than P. Our contribution is in proposing a third, new method, called the partial preaggregation method (PP), which is significantly less expensive than F, but just as accurate. We prove formally that the PP method yields the same results as the F method, and provide analytical and experimental results on the accuracy and computational benefits of the PP method.", acknowledgement = ack-nhfb, articleno = "2", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "multiple summary databases; OLAP; query estimation", } @Article{Olteanu:2007:FNS, author = "Dan Olteanu", title = "Forward node-selecting queries over trees", journal = j-TODS, volume = "32", number = "1", pages = "3:1--3:??", month = mar, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1206049.1206052", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:36:55 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Node-selecting queries over trees lie at the core of several important XML languages for the web, such as the node-selection language XPath, the query language XQuery, and the transformation language XSLT. The main syntactic constructs of such queries are the backward predicates, for example, ancestor and preceding, and the forward predicates, for example, descendant and following. Forward predicates are included in the depth-first, left-to-right preorder relation associated with the input tree, whereas backward predicates are included in the inverse of this preorder relation.\par This work is devoted to an expressiveness study of node-selecting queries with proven theoretical and practical applicability, especially in the field of query evaluation against XML streams. The main question it answers positively is whether, for each input query with forward and backward predicates, there exists an equivalent forward-only output query. This question is then positively answered for input and output queries of varying structural complexity, using LOGLIN and PSPACE reductions.\par Various existing applications based on the results of this work are reported, including query optimization and streamed evaluation.", acknowledgement = ack-nhfb, articleno = "3", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Expressiveness; rewriting; streams; XML; XPath", } @Article{Nash:2007:CMG, author = "Alan Nash and Philip A. Bernstein and Sergey Melnik", title = "Composition of mappings given by embedded dependencies", journal = j-TODS, volume = "32", number = "1", pages = "4:1--4:??", month = mar, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1206049.1206053", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:36:55 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Composition of mappings between schemas is essential to support schema evolution, data exchange, data integration, and other data management tasks. In many applications, mappings are given by embedded dependencies. In this article, we study the issues involved in composing such mappings.\par Our algorithms and results extend those of Fagin et al. [2004], who studied the composition of mappings given by several kinds of constraints. In particular, they proved that full source-to-target tuple-generating dependencies (tgds) are closed under composition, but embedded source-to-target tgds are not. They introduced a class of second-order constraints, SO tgds, that is closed under composition and has desirable properties for data exchange.\par We study constraints that need not be source-to-target and we concentrate on obtaining (first-order) embedded dependencies. As part of this study, we also consider full dependencies and second-order constraints that arise from Skolemizing embedded dependencies. For each of the three classes of mappings that we study, we provide: (a) an algorithm that attempts to compute the composition; and (b) sufficient conditions on the input mappings which guarantee that the algorithm will succeed.\par In addition, we give several negative results. In particular, we show that full and second-order dependencies that are not limited to be source-to-target are not closed under composition (for the latter, under the additional restriction that no new function symbols are introduced). Furthermore, we show that determining whether the composition can be given by these kinds of dependencies is undecidable.", acknowledgement = ack-nhfb, articleno = "4", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "database theory; Metadata management", } @Article{Hwang:2007:OTK, author = "Seung-won Hwang and Kevin Chen-chuan Chang", title = "Optimizing top-k queries for middleware access: a unified cost-based approach", journal = j-TODS, volume = "32", number = "1", pages = "5:1--5:??", month = mar, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1206049.1206054", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:36:55 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "This article studies optimizing top- k queries in middlewares. While many assorted algorithms have been proposed, none is generally applicable to a wide range of possible scenarios. Existing algorithms lack both the ``generality'' to support a wide range of access scenarios and the systematic ``adaptivity'' to account for runtime specifics. To fulfill this critical lacking, we aim at taking a cost-based optimization approach: By runtime search over a space of algorithms, cost-based optimization is general across a wide range of access scenarios, yet adaptive to the specific access costs at runtime. While such optimization has been taken for granted for relational queries from early on, it has been clearly lacking for ranked queries. In this article, we thus identify and address the barriers of realizing such a unified framework. As the first barrier, we need to define a ``comprehensive'' space encompassing all possibly optimal algorithms to search over. As the second barrier and a conflicting goal, such a space should also be ``focused'' enough to enable efficient search. For SQL queries that are explicitly composed of relational operators, such a space, by definition, consists of schedules of relational operators (or ``query plans''). In contrast, top- k queries do not have logical tasks, such as relational operators. We thus define the logical tasks of top- k queries as building blocks to identify a comprehensive and focused space for top- k queries. We then develop efficient search schemes over such space for identifying the optimal algorithm. Our study indicates that our framework not only unifies, but also outperforms existing algorithms specifically designed for their scenarios.", acknowledgement = ack-nhfb, articleno = "5", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "middlewares; Top-k query processing", } @Article{Ceri:2007:MCV, author = "Stefano Ceri and Francesco {Di Giunta} and Pier Luca Lanzi", title = "Mining constraint violations", journal = j-TODS, volume = "32", number = "1", pages = "6:1--6:??", month = mar, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1206049.1206055", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:36:55 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In this article, we introduce pesudoconstraints, a novel data mining pattern aimed at identifying rare events in databases. At first, we formally define pesudoconstraints using a probabilistic model and provide a statistical test to identify pesudoconstraints in a database. Then, we focus on a specific class of pesudoconstraints, named cycle pesudoconstraints, which often occur in databases. We define cycle pesudoconstraints in the context of the ER model and present an automatic method for detecting cycle pesudoconstraints from a relational database. Finally, we present an experiment to show cycle pesudoconstraints ``at work'' on real data.", acknowledgement = ack-nhfb, articleno = "6", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Deviation detection; probabilistic models; relational data mining", } @Article{Jacox:2007:SJT, author = "Edwin H. Jacox and Hanan Samet", title = "Spatial join techniques", journal = j-TODS, volume = "32", number = "1", pages = "7:1--7:??", month = mar, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1206049.1206056", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:36:55 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "A variety of techniques for performing a spatial join are reviewed. Instead of just summarizing the literature and presenting each technique in its entirety, distinct components of the different techniques are described and each is decomposed into an overall framework for performing a spatial join. A typical spatial join technique consists of the following components: partitioning the data, performing internal-memory spatial joins on subsets of the data, and checking if the full polygons intersect. Each technique is decomposed into these components and each component addressed in a separate section so as to compare and contrast similar aspects of each technique. The goal of this survey is to describe the algorithms within each component in detail, comparing and contrasting competing methods, thereby enabling further analysis and experimentation with each component and allowing the best algorithms for a particular situation to be built piecemeal, or, even better, enabling an optimizer to choose which algorithms to use.", acknowledgement = ack-nhfb, articleno = "7", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "External memory algorithms; plane-sweep; spatial join", } @Article{Athitsos:2007:QSE, author = "Vassilis Athitsos and Marios Hadjieleftheriou and George Kollios and Stan Sclaroff", title = "Query-sensitive embeddings", journal = j-TODS, volume = "32", number = "2", pages = "8:1--8:??", month = jun, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1242524.1242525", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:09 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "A common problem in many types of databases is retrieving the most similar matches to a query object. Finding these matches in a large database can be too slow to be practical, especially in domains where objects are compared using computationally expensive similarity (or distance) measures. Embedding methods can significantly speed-up retrieval by mapping objects into a vector space, where distances can be measured rapidly using a Minkowski metric. In this article we present a novel way to improve embedding quality. In particular, we propose to construct embeddings that use a query-sensitive distance measure for the target space of the embedding. This distance measure is used to compare those vectors that the query and database objects are mapped to. The term ``query-sensitive'' means that the distance measure changes, depending on the current query object. We demonstrate theoretically that using a query-sensitive distance measure increases the modeling power of embeddings and allows them to capture more of the structure of the original space. We also demonstrate experimentally that query-sensitive embeddings can significantly improve retrieval performance. In experiments with an image database of handwritten digits and a time-series database, the proposed method outperforms existing state-of-the-art non-Euclidean indexing methods, meaning that it provides significantly better tradeoffs between efficiency and retrieval accuracy.", acknowledgement = ack-nhfb, articleno = "8", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Embedding methods; nearest-neighbor retrieval; non-Euclidean spaces; nonmetric spaces; similarity matching", } @Article{Chaudhuri:2007:OSS, author = "Surajit Chaudhuri and Gautam Das and Vivek Narasayya", title = "Optimized stratified sampling for approximate query processing", journal = j-TODS, volume = "32", number = "2", pages = "9:1--9:??", month = jun, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1242524.1242526", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:09 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The ability to approximately answer aggregation queries accurately and efficiently is of great benefit for decision support and data mining tools. In contrast to previous sampling-based studies, we treat the problem as an optimization problem where, given a workload of queries, we select a stratified random sample of the original data such that the error in answering the workload queries using the sample is minimized. A key novelty of our approach is that we can tailor the choice of samples to be robust, even for workloads that are ``similar'' but not necessarily identical to the given workload. Finally, our techniques recognize the importance of taking into account the variance in the data distribution in a principled manner. We show how our solution can be implemented on a database system, and present results of extensive experiments on Microsoft SQL Server that demonstrate the superior quality of our method compared to previous work.", acknowledgement = ack-nhfb, articleno = "9", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "approximation; query processing; Random sampling", } @Article{Deligiannakis:2007:EWM, author = "Antonios Deligiannakis and Minos Garofalakis and Nick Roussopoulos", title = "Extended wavelets for multiple measures", journal = j-TODS, volume = "32", number = "2", pages = "10:1--10:??", month = jun, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1242524.1242527", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:09 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Several studies have demonstrated the effectiveness of the Haar wavelet decomposition as a tool for reducing large amounts of data down to compact wavelet synopses that can be used to obtain fast, accurate approximate answers to user queries. Although originally designed for minimizing the overall mean-squared (i.e., $ L^2 $-norm) error in the data approximation, recently proposed methods also enable the use of Haar wavelets in minimizing other error metrics, such as the relative error in data value reconstruction, which is arguably the most important for approximate query answers. Relatively little attention, however, has been paid to the problem of using wavelet synopses as an approximate query answering tool over complex tabular datasets containing multiple measures, such as those typically found in real-life OLAP applications. Existing decomposition approaches will either operate on each measure individually, or treat all measures as a vector of values and process them simultaneously. As we demonstrate in this article, these existing individual or combined storage approaches for the wavelet coefficients of different measures can easily lead to suboptimal storage utilization, resulting in drastically reduced accuracy for approximate query answers. To address this problem, in this work, we introduce the notion of an extended wavelet coefficient as a flexible, efficient storage method for wavelet coefficients over multimeasure data. We also propose novel algorithms for constructing effective (optimal or near-optimal) extended wavelet-coefficient synopses under a given storage constraint, for both sum-squared error and relative-error norms. Experimental results with both real-life and synthetic datasets validate our approach, demonstrating that our techniques consistently obtain significant gains in approximation accuracy compared to existing solutions.", acknowledgement = ack-nhfb, articleno = "10", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "approximate query processing; data synopses; Wavelets", } @Article{Rusu:2007:PRN, author = "Florin Rusu and Alin Dobra", title = "Pseudo-random number generation for sketch-based estimations", journal = j-TODS, volume = "32", number = "2", pages = "11:1--11:48", month = jun, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1242524.1242528", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:09 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The exact computation of aggregate queries, like the size of join of two relations, usually requires large amounts of memory (constrained in data-streaming) or communication (constrained in distributed computation) and large processing times. In this situation, approximation techniques with provable guarantees, like sketches, are one possible solution. The performance of sketches depends crucially on the ability to generate particular pseudo-random numbers. In this article we investigate both theoretically and empirically the problem of generating k -wise independent pseudo-random numbers and, in particular, that of generating 3- and 4-wise independent pseudo-random numbers that are fast range-summable (i.e., they can be summed in sublinear time). Our specific contributions are: (a) we provide a thorough comparison of the various pseudo-random number generating schemes; (b) we study both theoretically and empirically the fast range-summation property of 3- and 4-wise independent generating schemes; (c) we provide algorithms for the fast range-summation of two 3-wise independent schemes, BCH and extended Hamming; and (d) we show convincing theoretical and empirical evidence that the extended Hamming scheme performs as well as any 4-wise independent scheme for estimating the size of join of two relations using AMS sketches, even though it is only 3-wise independent. We use this scheme to generate estimators that significantly outperform state-of-the-art solutions for two problems, namely, size of spatial joins and selectivity estimation.", acknowledgement = ack-nhfb, articleno = "11", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "approximate query processing; data synopses; fast range-summation; Sketches", } @Article{Mazeika:2007:ESA, author = "Arturas Mazeika and Michael H. B{\"o}hlen and Nick Koudas and Divesh Srivastava", title = "Estimating the selectivity of approximate string queries", journal = j-TODS, volume = "32", number = "2", pages = "12:1--12:??", month = jun, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1242524.1242529", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:09 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Approximate queries on string data are important due to the prevalence of such data in databases and various conventions and errors in string data. We present the VSol estimator, a novel technique for estimating the selectivity of approximate string queries. The VSol estimator is based on inverse strings and makes the performance of the selectivity estimator independent of the number of strings. To get inverse strings we decompose all database strings into overlapping substrings of length q (q-grams) and then associate each q-gram with its inverse string: the IDs of all strings that contain the q-gram. We use signatures to compress inverse strings, and clustering to group similar signatures.\par We study our technique analytically and experimentally. The space complexity of our estimator only depends on the number of neighborhoods in the database and the desired estimation error. The time to estimate the selectivity is independent of the number of database strings and linear with respect to the length of query string. We give a detailed empirical performance evaluation of our solution for synthetic and real-world datasets. We show that VSol is effective for large skewed databases of short strings.", acknowledgement = ack-nhfb, articleno = "12", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Inverse strings; min-wise hash signatures; q-grams", } @Article{Zeng:2007:CCC, author = "Zhiping Zeng and Jianyong Wang and Lizhu Zhou and George Karypis", title = "Out-of-core coherent closed quasi-clique mining from large dense graph databases", journal = j-TODS, volume = "32", number = "2", pages = "13:1--13:??", month = jun, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1242524.1242530", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:09 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Due to the ability of graphs to represent more generic and more complicated relationships among different objects, graph mining has played a significant role in data mining, attracting increasing attention in the data mining community. In addition, frequent coherent subgraphs can provide valuable knowledge about the underlying internal structure of a graph database, and mining frequently occurring coherent subgraphs from large dense graph databases has witnessed several applications and received considerable attention in the graph mining community recently. In this article, we study how to efficiently mine the complete set of coherent closed quasi-cliques from large dense graph databases, which is an especially challenging task due to the fact that the downward-closure property no longer holds. By fully exploring some properties of quasi-cliques, we propose several novel optimization techniques which can prune the unpromising and redundant subsearch spaces effectively. Meanwhile, we devise an efficient closure checking scheme to facilitate the discovery of closed quasi-cliques only. Since large databases cannot be held in main memory, we also design an out-of-core solution with efficient index structures for mining coherent closed quasi-cliques from large dense graph databases. We call this Cocain*. Thorough performance study shows that Cocain* is very efficient and scalable for large dense graph databases.", acknowledgement = ack-nhfb, articleno = "13", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "coherent subgraph; frequent closed subgraph; Graph mining; out-of-core algorithm; quasi-clique", } @Article{Ipeirotis:2007:MMC, author = "Panagiotis G. Ipeirotis and Alexandros Ntoulas and Junghoo Cho and Luis Gravano", title = "Modeling and managing changes in text databases", journal = j-TODS, volume = "32", number = "3", pages = "14:1--14:??", month = aug, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1272743.1272744", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:57 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Large amounts of (often valuable) information are stored in web-accessible text databases. ``Metasearchers'' provide unified interfaces to query multiple such databases at once. For efficiency, metasearchers rely on succinct statistical summaries of the database contents to select the best databases for each query. So far, database selection research has largely assumed that databases are static, so the associated statistical summaries do not evolve over time. However, databases are rarely static and the statistical summaries that describe their contents need to be updated periodically to reflect content changes. In this article, we first report the results of a study showing how the content summaries of 152 real web databases evolved over a period of 52 weeks. Then, we show how to use ``survival analysis'' techniques in general, and Cox's proportional hazards regression in particular, to model database changes over time and predict when we should update each content summary. Finally, we exploit our change model to devise update schedules that keep the summaries up to date by contacting databases only when needed, and then we evaluate the quality of our schedules experimentally over real web databases.", acknowledgement = ack-nhfb, articleno = "14", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "distributed information retrieval; Metasearching; text database selection", } @Article{Tao:2007:RSM, author = "Yufei Tao and Xiaokui Xiao and Reynold Cheng", title = "Range search on multidimensional uncertain data", journal = j-TODS, volume = "32", number = "3", pages = "15:1--15:??", month = aug, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1272743.1272745", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:22 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In an uncertain database, every object o is associated with a probability density function, which describes the likelihood that o appears at each position in a multidimensional workspace. This article studies two types of range retrieval fundamental to many analytical tasks. Specifically, a nonfuzzy query returns all the objects that appear in a search region $ r_q $ with at least a certain probability $ t_q $. On the other hand, given an uncertain object $q$, fuzzy search retrieves the set of objects that are within distance $ \varepsilon_q $ from $q$ with no less than probability $ t_q $. The core of our methodology is a novel concept of ``probabilistically constrained rectangle'', which permits effective pruning\slash validation of nonqualifying\slash qualifying data. We develop a new index structure called the U-tree for minimizing the query overhead. Our algorithmic findings are accompanied with a thorough theoretical analysis, which reveals valuable insight into the problem characteristics, and mathematically confirms the efficiency of our solutions. We verify the effectiveness of the proposed techniques with extensive experiments.", acknowledgement = ack-nhfb, articleno = "15", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "range search; Uncertain databases", } @Article{Sinha:2007:MRB, author = "Rishi Rakesh Sinha and Marianne Winslett", title = "Multi-resolution bitmap indexes for scientific data", journal = j-TODS, volume = "32", number = "3", pages = "16:1--16:??", month = aug, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1272743.1272746", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:57 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The unique characteristics of scientific data and queries cause traditional indexing techniques to perform poorly on scientific workloads, occupy excessive space, or both. Refinements of bitmap indexes have been proposed previously as a solution to this problem. In this article, we describe the difficulties we encountered in deploying bitmap indexes with scientific data and queries from two real-world domains. In particular, previously proposed methods of binning, encoding, and compressing bitmap vectors either were quite slow for processing the large-range query conditions our scientists used, or required excessive storage space. Nor could the indexes easily be built or used on parallel platforms. In this article, we show how to solve these problems through the use of multi-resolution, parallelizable bitmap indexes, which support a fine-grained trade-off between storage requirements and query performance. Our experiments with large data sets from two scientific domains show that multi-resolution, parallelizable bitmap indexes occupy an acceptable amount of storage while improving range query performance by roughly a factor of 10, compared to a single-resolution bitmap index of reasonable size.", acknowledgement = ack-nhfb, articleno = "16", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "bitmap index; parallel index; Query processing; scientific data management", } @Article{Chen:2007:IHJ, author = "Shimin Chen and Anastassia Ailamaki and Phillip B. Gibbons and Todd C. Mowry", title = "Improving hash join performance through prefetching", journal = j-TODS, volume = "32", number = "3", pages = "17:1--17:??", month = aug, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1272743.1272747", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:57 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Hash join algorithms suffer from extensive CPU cache stalls. This article shows that the standard hash join algorithm for disk-oriented databases (i.e. GRACE) spends over 80\% of its user time stalled on CPU cache misses, and explores the use of CPU cache prefetching to improve its cache performance. Applying prefetching to hash joins is complicated by the data dependencies, multiple code paths, and inherent randomness of hashing. We present two techniques, group prefetching and software-pipelined prefetching, that overcome these complications. These schemes achieve 1.29--4.04X speedups for the join phase and 1.37--3.49X speedups for the partition phase over GRACE and simple prefetching approaches. Moreover, compared with previous cache-aware approaches (i.e. cache partitioning), the schemes are at least 36\% faster on large relations and do not require exclusive use of the CPU cache to be effective. Finally, comparing the elapsed real times when disk I/Os are in the picture, our cache prefetching schemes achieve 1.12--1.84X speedups for the join phase and 1.06--1.60X speedups for the partition phase over the GRACE hash join algorithm.", acknowledgement = ack-nhfb, articleno = "17", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "CPU cache performance; CPU cache prefetching; group prefetching; Hash join; software-pipelined prefetching", } @Article{Cao:2007:SQO, author = "Bin Cao and Antonio Badia", title = "{SQL} query optimization through nested relational algebra", journal = j-TODS, volume = "32", number = "3", pages = "18:1--18:??", month = aug, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1272743.1272748", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:57 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Most research work on optimization of nested queries focuses on aggregate subqueries. In this article, we show that existing approaches are not adequate for nonaggregate subqueries, especially for those having multiple subqueries and certain comparison operators. We then propose a new efficient approach, the nested relational approach, based on the nested relational algebra. The nested relational approach treats all subqueries in a uniform manner, being able to deal with nested queries of any type and any level. We report on experimental work that confirms that existing approaches have difficulties dealing with nonaggregate subqueries, and that the nested relational approach offers better performance. We also discuss algebraic optimization rules for further optimizing the nested relational approach and the issue of integrating it into relational database systems.", acknowledgement = ack-nhfb, articleno = "18", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Nested queries; nested relational algebra; nonrelational query processing", } @Article{Mamoulis:2007:ETA, author = "Nikos Mamoulis and Man Lung Yiu and Kit Hung Cheng and David W. Cheung", title = "Efficient top-$k$ aggregation of ranked inputs", journal = j-TODS, volume = "32", number = "3", pages = "19:1--19:??", month = aug, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1272743.1272749", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:22 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "A top- k query combines different rankings of the same set of objects and returns the k objects with the highest combined score according to an aggregate function. We bring to light some key observations, which impose two phases that any top- k algorithm, based on sorted accesses, should go through. Based on them, we propose a new algorithm, which is designed to minimize the number of object accesses, the computational cost, and the memory requirements of top- k search with monotone aggregate functions. We provide an analysis for its cost and show that it is always no worse than the baseline ``no random accesses'' algorithm in terms of computations, accesses, and memory required. As a side contribution, we perform a space analysis, which indicates the memory requirements of top- k algorithms that only perform sorted accesses. For the case, where the required space exceeds the available memory, we propose disk-based variants of our algorithm. We propose and optimize a multiway top- k join operator, with certain advantages over evaluation trees of binary top- k join operators. Finally, we define and study the computation of top- k cubes and the implementation of roll-up and drill-down operations in such cubes. Extensive experiments with synthetic and real data show that, compared to previous techniques, our method accesses fewer objects, while being orders of magnitude faster.", acknowledgement = ack-nhfb, articleno = "19", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "rank aggregation; Top- k queries", } @Article{Chaudhuri:2007:IAS, author = "Surajit Chaudhuri", title = "Introduction to {ACM SIGMOD} 2006 conference papers", journal = j-TODS, volume = "32", number = "4", pages = "20:1--20:??", month = nov, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1292609.1292610", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "20", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Ipeirotis:2007:TQO, author = "Panagiotis G. Ipeirotis and Eugene Agichtein and Pranay Jain and Luis Gravano", title = "Towards a query optimizer for text-centric tasks", journal = j-TODS, volume = "32", number = "4", pages = "21:1--21:??", month = nov, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1292609.1292611", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Text is ubiquitous and, not surprisingly, many important applications rely on textual data for a variety of tasks. As a notable example, information extraction applications derive structured relations from unstructured text; as another example, focused crawlers explore the Web to locate pages about specific topics. Execution plans for text-centric tasks follow two general paradigms for processing a text database: either we can scan, or ``crawl,'' the text database or, alternatively, we can exploit search engine indexes and retrieve the documents of interest via carefully crafted queries constructed in task-specific ways. The choice between crawl- and query-based execution plans can have a substantial impact on both execution time and output ``completeness'' (e.g., in terms of recall). Nevertheless, this choice is typically ad hoc and based on heuristics or plain intuition. In this article, we present fundamental building blocks to make the choice of execution plans for text-centric tasks in an informed, cost-based way. Towards this goal, we show how to analyze query- and crawl-based plans in terms of both execution time and output completeness. We adapt results from random-graph theory and statistics to develop a rigorous cost model for the execution plans. Our cost model reflects the fact that the performance of the plans depends on fundamental task-specific properties of the underlying text databases. We identify these properties and present efficient techniques for estimating the associated parameters of the cost model. We also present two optimization approaches for text-centric tasks that rely on the cost-model parameters and select efficient execution plans. Overall, our optimization approaches help build efficient execution plans for a task, resulting in significant efficiency and output completeness benefits. We complement our results with a large-scale experimental evaluation for three important text-centric tasks and over multiple real-life data sets.", acknowledgement = ack-nhfb, articleno = "21", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "distributed information retrieval; focused crawling; information extraction; Metasearching; text database selection", } @Article{Petropoulos:2007:EIQ, author = "Michalis Petropoulos and Alin Deutsch and Yannis Papakonstantinou and Yannis Katsis", title = "Exporting and interactively querying {Web} service-accessed sources: {The CLIDE System}", journal = j-TODS, volume = "32", number = "4", pages = "22:1--22:??", month = nov, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1292609.1292612", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The CLIDE System assists the owners of sources that participate in Web service-based data publishing systems to publish a restricted set of parameterized queries over the schema of their sources and package them as WSDL services. The sources may be relational databases, which naturally have a schema, or ad hoc information/application systems whereas the owner publishes a virtual schema. CLIDE allows information clients to pose queries over the published schema and utilizes prior work on answering queries using views to answer queries that can be processed by combining and processing the results of one or more Web service calls. These queries are called feasible. Contrary to prior work, where infeasible queries are rejected without an explanatory feedback, leading the user into a frustrating trial-and-error cycle, CLIDE features a query formulation interface, which extends the QBE-like query builder of Microsoft's SQL Server with a color scheme that guides the user toward formulating feasible queries. CLIDE guarantees that the suggested query edit actions are complete (i.e., each feasible query can be built by following only suggestions), rapidly convergent (the suggestions are tuned to lead to the closest feasible completions of the query), and suitably summarized (at each interaction step, only a minimal number of actions needed to preserve completeness are suggested). We present the algorithms, implementation, and performance evaluation showing that CLIDE is a viable on-line tool.", acknowledgement = ack-nhfb, articleno = "22", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "limited access patterns; Middleware; query rewriting; Web services", } @Article{Sharfman:2007:GAM, author = "Izchak Sharfman and Assaf Schuster and Daniel Keren", title = "A geometric approach to monitoring threshold functions over distributed data streams", journal = j-TODS, volume = "32", number = "4", pages = "23:1--23:??", month = nov, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1292609.1292613", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Monitoring data streams in a distributed system is the focus of much research in recent years. Most of the proposed schemes, however, deal with monitoring simple aggregated values, such as the frequency of appearance of items in the streams. More involved challenges, such as the important task of feature selection (e.g., by monitoring the information gain of various features), still require very high communication overhead using naive, centralized algorithms.\par We present a novel geometric approach which reduces monitoring the value of a function (vis-{\`a}-vis a threshold) to a set of constraints applied locally on each of the streams. The constraints are used to locally filter out data increments that do not affect the monitoring outcome, thus avoiding unnecessary communication. As a result, our approach enables monitoring of arbitrary threshold functions over distributed data streams in an efficient manner.\par We present experimental results on real-world data which demonstrate that our algorithms are highly scalable, and considerably reduce communication load in comparison to centralized algorithms.", acknowledgement = ack-nhfb, articleno = "23", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Distributed monitoring", } @Article{VandenBussche:2007:IPS, author = "Jan {Van den Bussche}", title = "Introduction to the {PODS} 2006 special section", journal = j-TODS, volume = "32", number = "4", pages = "24:1--24:??", month = nov, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1292609.1292614", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "24", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Fagin:2007:ISM, author = "Ronald Fagin", title = "Inverting schema mappings", journal = j-TODS, volume = "32", number = "4", pages = "25:1--25:??", month = nov, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1292609.1292615", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "A schema mapping is a specification that describes how data structured under one schema (the source schema) is to be transformed into data structured under a different schema (the target schema). Although the notion of an inverse of a schema mapping is important, the exact definition of an inverse mapping is somewhat elusive. This is because a schema mapping may associate many target instances with each source instance, and many source instances with each target instance. Based on the notion that the composition of a mapping and its inverse is the identity, we give a formal definition for what it means for a schema mapping $ M \prime $ to be an inverse of a schema mapping $M$ for a class $S$ of source instances. We call such an inverse an $S$-inverse. A particular case of interest arises when $S$ is the class of all source instances, in which case an $S$-inverse is a global inverse. We focus on the important and practical case of schema mappings specified by source-to-target tuple-generating dependencies, and uncover a rich theory. When $S$ is specified by a set of dependencies with a finite chase, we show how to construct an $S$-inverse when one exists. In particular, we show how to construct a global inverse when one exists. Given $M$ and $ M \prime $, we show how to define the largest class $S$ such that $ M \prime $ is an $S$-inverse of $M$.", acknowledgement = ack-nhfb, articleno = "25", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "chase; computational complexity; Data exchange; data integration; dependencies; inverse; metadata model management; schema mapping; second-order logic", } @Article{Bender:2007:APM, author = "Michael A. Bender and Haodong Hu", title = "An adaptive packed-memory array", journal = j-TODS, volume = "32", number = "4", pages = "26:1--26:??", month = nov, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1292609.1292616", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The packed-memory array (PMA) is a data structure that maintains a dynamic set of $N$ elements in sorted order in a $ \Theta (N) $-sized array. The idea is to intersperse $ \Theta (N) $ empty spaces or gaps among the elements so that only a small number of elements need to be shifted around on an insert or delete. Because the elements are stored physically in sorted order in memory or on disk, the PMA can be used to support extremely efficient range queries. Specifically, the cost to scan $L$ consecutive elements is $ O(1 + L / B) $ memory transfers.\par This article gives the first adaptive packed-memory array (APMA), which automatically adjusts to the input pattern. Like the traditional PMA, any pattern of updates costs only $ O(\log_2 N) $ amortized element moves and $ O(1 + (\log_2 N) / B) $ amortized memory transfers per update. However, the APMA performs even better on many common input distributions achieving only $ O(\log N) $ amortized element moves and $ O(1 + (\log N) / B) $ amortized memory transfers. The article analyzes sequential inserts, where the insertions are to the front of the APMA, hammer inserts, where the insertions ``hammer'' on one part of the APMA, random inserts, where the insertions are after random elements in the APMA, and bulk inserts, where for constant $ \alpha $ $ \epsilon $ $ [0, 1] $, $N$ $ \alpha $ elements are inserted after random elements in the APMA. The article then gives simulation results that are consistent with the asymptotic bounds. For sequential insertions of roughly 1.4 million elements, the APMA has four times fewer element moves per insertion than the traditional PMA and running times that are more than seven times faster.", acknowledgement = ack-nhfb, articleno = "26", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "adaptive packed-memory array; cache oblivious; locality preserving; packed-memory array; range query; rebalance; sequential file maintenance; sequential scan; sparse array", } @Article{Ioannidis:2007:IES, author = "Yannis Ioannidis", title = "Introduction to the {EDBT} 2006 special section", journal = j-TODS, volume = "32", number = "4", pages = "27:1--27:??", month = nov, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1292609.1292617", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "27", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Bruno:2007:PDR, author = "Nicolas Bruno and Surajit Chaudhuri", title = "Physical design refinement: {The} `merge-reduce' approach", journal = j-TODS, volume = "32", number = "4", pages = "28:1--28:??", month = nov, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1292609.1292618", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Physical database design tools rely on a DBA-provided workload to pick an ``optimal'' set of indexes and materialized views. Such tools allow either creating a new such configuration or adding new structures to existing ones. However, these tools do not provide adequate support for the incremental and flexible refinement of existing physical structures. Although such refinements are often very valuable for DBAs, a completely manual approach to refinement can lead to infeasible solutions (e.g., excessive use of space). In this article, we focus on the important problem of physical design refinement and propose a transformational architecture that is based upon two novel primitive operations, called merging and reduction. These operators help refine a configuration, treating indexes and materialized views in a unified way, as well as succinctly explain the refinement process to DBAs.", acknowledgement = ack-nhfb, articleno = "28", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Physical database design; physical design refinment; view merging and reduction", } @Article{Skopal:2007:UFF, author = "Tom{\'a}{\v{s}} Skopal", title = "Unified framework for fast exact and approximate search in dissimilarity spaces", journal = j-TODS, volume = "32", number = "4", pages = "29:1--29:??", month = nov, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1292609.1292619", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In multimedia systems we usually need to retrieve database (DB) objects based on their similarity to a query object, while the similarity assessment is provided by a measure which defines a (dis)similarity score for every pair of DB objects. In most existing applications, the similarity measure is required to be a metric, where the triangle inequality is utilized to speed up the search for relevant objects by use of metric access methods (MAMs), for example, the M-tree. A recent research has shown, however, that nonmetric measures are more appropriate for similarity modeling due to their robustness and ease to model a made-to-measure similarity. Unfortunately, due to the lack of triangle inequality, the nonmetric measures cannot be directly utilized by MAMs. From another point of view, some sophisticated similarity measures could be available in a black-box nonanalytic form (e.g., as an algorithm or even a hardware device), where no information about their topological properties is provided, so we have to consider them as nonmetric measures as well. From yet another point of view, the concept of similarity measuring itself is inherently imprecise and we often prefer fast but approximate retrieval over an exact but slower one.\par To date, the mentioned aspects of similarity retrieval have been solved separately, that is, exact versus approximate search or metric versus nonmetric search. In this article we introduce a similarity retrieval framework which incorporates both of the aspects into a single unified model. Based on the framework, we show that for any dissimilarity measure (either a metric or nonmetric) we are able to change the ``amount'' of triangle inequality, and so obtain an approximate or full metric which can be used for MAM-based retrieval. Due to the varying ``amount'' of triangle inequality, the measure is modified in a way suitable for either an exact but slower or an approximate but faster retrieval. Additionally, we introduce the TriGen algorithm aimed at constructing the desired modification of any black-box distance automatically, using just a small fraction of the database.", acknowledgement = ack-nhfb, articleno = "29", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "approximate and exact search; Similarity retrieval", } @Article{Li:2007:NGN, author = "Yunyao Li and Huahai Yang and H. V. Jagadish", title = "{NaLIX}: a generic natural language search environment for {XML} data", journal = j-TODS, volume = "32", number = "4", pages = "30:1--30:??", month = nov, year = "2007", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1292609.1292620", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We describe the construction of a generic natural language query interface to an XML database. Our interface can accept a large class of English sentences as a query, which can be quite complex and include aggregation, nesting, and value joins, among other things. This query is translated, potentially after reformulation, into an XQuery expression. The translation is based on mapping grammatical proximity of natural language parsed tokens in the parse tree of the query sentence to proximity of corresponding elements in the XML data to be retrieved. Iterative search in the form of followup queries is also supported. Our experimental assessment, through a user study, demonstrates that this type of natural language interface is good enough to be usable now, with no restrictions on the application domain.", acknowledgement = ack-nhfb, articleno = "30", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "dialog system; iterative search; Natural language interface; XML; XQuery", } @Article{Hristidis:2008:ABK, author = "Vagelis Hristidis and Heasoo Hwang and Yannis Papakonstantinou", title = "Authority-based keyword search in databases", journal = j-TODS, volume = "33", number = "1", pages = "1:1--1:40", month = mar, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1331904.1331905", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:49 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Our system applies authority-based ranking to keyword search in databases modeled as labeled graphs. Three ranking factors are used: the relevance to the query, the specificity and the importance of the result. All factors are handled using authority-flow techniques that exploit the link-structure of the data graph, in contrast to traditional Information Retrieval. We address the performance challenges in computing the authority flows in databases by using precomputation and exploiting the database schema if present. We conducted user surveys and performance experiments on multiple real and synthetic datasets, to assess the semantic meaningfulness and performance of our system.", acknowledgement = ack-nhfb, articleno = "1", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Authority flow; PageRank; quality experiments; ranking; specificity", } @Article{Aggarwal:2008:SDM, author = "Charu C. Aggarwal and Philip S. Yu", title = "On static and dynamic methods for condensation-based privacy-preserving data mining", journal = j-TODS, volume = "33", number = "1", pages = "2:1--2:39", month = mar, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1331904.1331906", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:49 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In recent years, privacy-preserving data mining has become an important problem because of the large amount of personal data which is tracked by many business applications. In many cases, users are unwilling to provide personal information unless the privacy of sensitive information is guaranteed. In this paper, we propose a new framework for privacy-preserving data mining of multidimensional data. Previous work for privacy-preserving data mining uses a perturbation approach which reconstructs data distributions in order to perform the mining. Such an approach treats each dimension independently and therefore ignores the correlations between the different dimensions. In addition, it requires the development of a new distribution-based algorithm for each data mining problem, since it does not use the multidimensional records, but uses aggregate distributions of the data as input. This leads to a fundamental re-design of data mining algorithms. In this paper, we will develop a new and flexible approach for privacy-preserving data mining that does not require new problem-specific algorithms, since it maps the original data set into a new anonymized data set. These anonymized data closely match the characteristics of the original data including the correlations among the different dimensions. We will show how to extend the method to the case of data streams. We present empirical results illustrating the effectiveness of the method. We also show the efficiency of the method for data streams.", acknowledgement = ack-nhfb, articleno = "2", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "databases data mining; k -anonymity; Privacy", } @Article{Balazinska:2008:FTB, author = "Magdalena Balazinska and Hari Balakrishnan and Samuel R. Madden and Michael Stonebraker", title = "Fault-tolerance in the {Borealis} distributed stream processing system", journal = j-TODS, volume = "33", number = "1", pages = "3:1--3:44", month = mar, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1331904.1331907", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:49 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Over the past few years, Stream Processing Engines (SPEs) have emerged as a new class of software systems, enabling low latency processing of streams of data arriving at high rates. As SPEs mature and get used in monitoring applications that must continuously run (e.g., in network security monitoring), a significant challenge arises: SPEs must be able to handle various software and hardware faults that occur, masking them to provide high availability (HA). In this article, we develop, implement, and evaluate DPC (Delay, Process, and Correct), a protocol to handle crash failures of processing nodes and network failures in a distributed SPE.\par Like previous approaches to HA, DPC uses replication and masks many types of node and network failures. In the presence of network partitions, the designer of any replication system faces a choice between providing availability or data consistency across the replicas. In DPC, this choice is made explicit: the user specifies an availability bound (no result should be delayed by more than a specified delay threshold even under failure if the corresponding input is available), and DPC attempts to minimize the resulting inconsistency between replicas (not all of which might have seen the input data) while meeting the given delay threshold. Although conceptually simple, the DPC protocol tolerates the occurrence of multiple simultaneous failures as well as any further failures that occur during recovery.\par This article describes DPC and its implementation in the Borealis SPE. We show that DPC enables a distributed SPE to maintain low-latency processing at all times, while also achieving eventual consistency, where applications eventually receive the complete and correct output streams. Furthermore, we show that, independent of system size and failure location, it is possible to handle failures almost up-to the user-specified bound in a manner that meets the required availability without introducing any inconsistency.", acknowledgement = ack-nhfb, articleno = "3", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "availability; consistency; Distributed stream processing; fault-tolerance", } @Article{Fan:2008:IPX, author = "Wenfei Fan and Philip Bohannon", title = "Information preserving {XML} schema embedding", journal = j-TODS, volume = "33", number = "1", pages = "4:1--4:44", month = mar, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1331904.1331908", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:49 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "A fundamental concern of data integration in an XML context is the ability to embed one or more source documents in a target document so that (a) the target document conforms to a target schema and (b) the information in the source documents is preserved. In this paper, information preservation for XML is formally studied, and the results of this study guide the definition of a novel notion of schema embedding between two XML DTD schemas represented as graphs. Schema embedding generalizes the conventional notion of graph similarity by allowing an edge in a source DTD schema to be mapped to a path in the target DTD. Instance-level embeddings can be derived from the schema embedding in a straightforward manner, such that conformance to a target schema and information preservation are guaranteed. We show that it is NP-complete to find an embedding between two DTD schemas. We also outline efficient heuristic algorithms to find candidate embeddings, which have proved effective by our experimental study. These yield the first systematic and effective approach to finding information preserving XML mappings.", acknowledgement = ack-nhfb, articleno = "4", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Data transformation; information integration; information preservation; schema embedding; schema mapping; XML; XSLT", } @Article{Sharaf:2008:AMP, author = "Mohamed A. Sharaf and Panos K. Chrysanthis and Alexandros Labrinidis and Kirk Pruhs", title = "Algorithms and metrics for processing multiple heterogeneous continuous queries", journal = j-TODS, volume = "33", number = "1", pages = "5:1--5:44", month = mar, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1331904.1331909", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jun 12 16:37:49 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The emergence of monitoring applications has precipitated the need for Data Stream Management Systems (DSMSs), which constantly monitor incoming data feeds (through registered continuous queries), in order to detect events of interest. In this article, we examine the problem of how to schedule multiple Continuous Queries (CQs) in a DSMS to optimize different Quality of Service (QoS) metrics. We show that, unlike traditional online systems, scheduling policies in DSMSs that optimize for average response time will be different from policies that optimize for average slowdown, which is a more appropriate metric to use in the presence of a heterogeneous workload. Towards this, we propose policies to optimize for the average-case performance for both metrics. Additionally, we propose a hybrid scheduling policy that strikes a fine balance between performance and fairness, by looking at both the average- and worst-case performance, for both metrics. We also show how our policies can be adaptive enough to handle the inherent dynamic nature of monitoring applications. Furthermore, we discuss how our policies can be efficiently implemented and extended to exploit sharing in optimized multi-query plans and multi-stream CQs. Finally, we experimentally show using real data that our policies consistently outperform currently used ones.", acknowledgement = ack-nhfb, articleno = "5", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "continuous queries; Data stream management system; operator scheduling", } @Article{Fan:2008:CFD, author = "Wenfei Fan and Floris Geerts and Xibei Jia and Anastasios Kementsietsidis", title = "Conditional functional dependencies for capturing data inconsistencies", journal = j-TODS, volume = "33", number = "2", pages = "6:1--6:48", month = jun, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1366102.1366103", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jun 25 08:39:17 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We propose a class of integrity constraints for relational databases, referred to as {\em conditional functional dependencies\/} (CFDs), and study their applications in data cleaning. In contrast to traditional functional dependencies (FDs) that were developed mainly for schema design, CFDs aim at capturing the consistency of data by enforcing bindings of semantically related values. For static analysis of CFDs we investigate {\em the consistency problem}, which is to determine whether or not there exists a nonempty database satisfying a given set of CFDs, and {\em the implication problem}, which is to decide whether or not a set of CFDs entails another CFD. We show that while any set of transitional FDs is trivially consistent, the consistency problem is NP-complete for CFDs, but it is in PTIME when either the database schema is predefined or no attributes involved in the CFDs have a finite domain. For the implication analysis of CFDs, we provide an inference system analogous to Armstrong's axioms for FDs, and show that the implication problem is coNP-complete for CFDs in contrast to the linear-time complexity for their traditional counterpart. We also present an algorithm for computing a minimal cover of a set of CFDs. Since CFDs allow data bindings, in some cases CFDs may be physically large, complicating the detection of constraint violations. We develop techniques for detecting CFD violations in SQL as well as novel techniques for checking multiple constraints by a single query. We also provide incremental methods for checking CFDs in response to changes to the database. We experimentally verify the effectiveness of our CFD-based methods for inconsistency detection. This work not only yields a constraint theory for CFDs but is also a step toward a practical constraint-based method for improving data quality.", acknowledgement = ack-nhfb, articleno = "6", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data cleaning; functional dependency; SQL", } @Article{Jacox:2008:MSS, author = "Edwin H. Jacox and Hanan Samet", title = "Metric space similarity joins", journal = j-TODS, volume = "33", number = "2", pages = "7:1--7:38", month = jun, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1366102.1366104", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jun 25 08:39:17 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Similarity join algorithms find pairs of objects that lie within a certain distance $ \epsilon $ of each other. Algorithms that are adapted from spatial join techniques are designed primarily for data in a vector space and often employ some form of a multidimensional index. For these algorithms, when the data lies in a metric space, the usual solution is to embed the data in vector space and then make use of a multidimensional index. Such an approach has a number of drawbacks when the data is high dimensional as we must eventually find the most discriminating dimensions, which is not trivial. In addition, although the maximum distance between objects increases with dimension, the ability to discriminate between objects in each dimension does not. These drawbacks are overcome via the introduction of a new method called {\em Quickjoin\/} that does not require a multidimensional index and instead adapts techniques used in distance-based indexing for use in a method that is conceptually similar to the Quicksort algorithm. A formal analysis is provided of the Quickjoin method. Experiments show that the Quickjoin method significantly outperforms two existing techniques.", acknowledgement = ack-nhfb, articleno = "7", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "distance-based indexing; external memory algorithms; nearest neighbor queries; range queries; ranking; similarity join", } @Article{He:2008:COD, author = "Bingsheng He and Qiong Luo", title = "Cache-oblivious databases: {Limitations} and opportunities", journal = j-TODS, volume = "33", number = "2", pages = "8:1--8:42", month = jun, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1366102.1366105", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jun 25 08:39:17 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Cache-oblivious techniques, proposed in the theory community, have optimal asymptotic bounds on the amount of data transferred between any two adjacent levels of an arbitrary memory hierarchy. Moreover, this optimal performance is achieved without any hardware platform specific tuning. These properties are highly attractive to autonomous databases, especially because the hardware architectures are becoming increasingly complex and diverse.\par In this article, we present our design, implementation, and evaluation of the first cache-oblivious in-memory query processor, EaseDB. Moreover, we discuss the inherent limitations of the cache-oblivious approach as well as the opportunities given by the upcoming hardware architectures. Specifically, a cache-oblivious technique usually requires sophisticated algorithm design to achieve a comparable performance to its cache-conscious counterpart. Nevertheless, this development-time effort is compensated by the automaticity of performance achievement and the reduced ownership cost. Furthermore, this automaticity enables cache-oblivious techniques to outperform their cache-conscious counterparts in multi-threading processors.", acknowledgement = ack-nhfb, articleno = "8", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "cache-conscious; cache-oblivious; chip multiprocessors; data caches; simultaneous multithreading", } @Article{Cormode:2008:ACQ, author = "Graham Cormode and Minos Garofalakis", title = "Approximate continuous querying over distributed streams", journal = j-TODS, volume = "33", number = "2", pages = "9:1--9:??", month = jun, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1366102.1366106", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jun 25 08:39:17 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "While traditional database systems optimize for performance on one-shot query processing, emerging large-scale monitoring applications require continuous tracking of complex data-analysis queries over collections of physically distributed streams. Thus, effective solutions have to be simultaneously space/time efficient (at each remote monitor site), communication efficient (across the underlying communication network), and provide continuous, guaranteed-quality approximate query answers. In this paper, we propose novel algorithmic solutions for the problem of continuously tracking a broad class of complex aggregate queries in such a distributed-streams setting. Our tracking schemes maintain approximate query answers with provable error guarantees, while simultaneously optimizing the storage space and processing time at each remote site, and the communication cost across the network. In a nutshell, our algorithms rely on tracking general-purpose randomized sketch summaries of local streams at remote sites along with concise prediction models of local site behavior in order to produce highly communication- and space/time-efficient solutions. The end result is a powerful approximate query tracking framework that readily incorporates several complex analysis queries (including distributed join and multi-join aggregates, and approximate wavelet representations), thus giving the first known low-overhead tracking solution for such queries in the distributed-streams model. Experiments with real data validate our approach, revealing significant savings over naive solutions as well as our analytical worst-case guarantees.", acknowledgement = ack-nhfb, articleno = "9", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "approximate query processing; continuous distributed monitoring; data stream algorithms; data synopses", } @Article{Eiter:2008:RLQ, author = "Thomas Eiter and Michael Fink and Gianluigi Greco and Domenico Lembo", title = "Repair localization for query answering from inconsistent databases", journal = j-TODS, volume = "33", number = "2", pages = "10:1--10:??", month = jun, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1366102.1366107", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jun 25 08:39:17 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Query answering from inconsistent databases amounts to finding ``meaningful'' answers to queries posed over database instances that do not satisfy integrity constraints specified over their schema. A declarative approach to this problem relies on the notion of repair, that is, a database that satisfies integrity constraints and is obtained from the original inconsistent database by ``minimally'' adding and/or deleting tuples. Consistent answers to a user query are those answers that are in the evaluation of the query over each repair. Motivated by the fact that computing consistent answers from inconsistent databases is in general intractable, the present paper investigates techniques that allow to localize the difficult part of the computation on a small fragment of the database at hand, called ``affected'' part. Based on a number of localization results, an approach to query answering from inconsistent data is presented, in which the query is evaluated over each of the repairs of the affected part only, augmented with the part that is not affected. Single query results are then suitably recombined. For some relevant settings, techniques are also discussed to factorize repairs into components that can be processed independently of one another, thereby guaranteeing exponential gain w.r.t. the basic approach, which is not based on localization. The effectiveness of the results is demonstrated for consistent query answering over expressive schemas, based on logic programming specifications as proposed in the literature.", acknowledgement = ack-nhfb, articleno = "10", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "consistent query answering; data integration; database repairs; inconsistency management in databases; logic programming; stable models", } @Article{Fagin:2008:QIS, author = "Ronald Fagin and Phokion G. Kolaitis and Lucian Popa and Wang-Chiew Tan", title = "Quasi-inverses of schema mappings", journal = j-TODS, volume = "33", number = "2", pages = "11:1--11:??", month = jun, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1366102.1366108", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jun 25 08:39:17 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Schema mappings are high-level specifications that describe the relationship between two database schemas. Two operators on schema mappings, namely the composition operator and the inverse operator, are regarded as especially important. Progress on the study of the inverse operator was not made until very recently, as even finding the exact semantics of this operator turned out to be a fairly delicate task. Furthermore, this notion is rather restrictive, since it is rare that a schema mapping possesses an inverse.\par In this article, we introduce and study the notion of a quasi-inverse of a schema mapping. This notion is a principled relaxation of the notion of an inverse of a schema mapping; intuitively, it is obtained from the notion of an inverse by not differentiating between instances that are equivalent for data-exchange purposes. For schema mappings specified by source-to-target tuple-generating dependencies (s-t tgds), we give a necessary and sufficient combinatorial condition for the existence of a quasi-inverse, and then use this condition to obtain both positive and negative results about the existence of quasi-inverses. In particular, we show that every LAV (local-as-view) schema mapping has a quasi-inverse, but that there are schema mappings specified by full s-t tgds that have no quasi-inverse. After this, we study the language needed to express quasi-inverses of schema mappings specified by s-t tgds, and we obtain a complete characterization. We also characterize the language needed to express inverses of schema mappings, and thereby solve a problem left open in the earlier study of the inverse operator. Finally, we show that quasi-inverses can be used in many cases to recover the data that was exported by the original schema mapping when performing data exchange.", acknowledgement = ack-nhfb, articleno = "11", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "chase; data exchange; data integration; dependencies; inverse; metadata model management; quasi-inverse; schema mapping", } @Article{Zhang:2008:CTA, author = "Donghui Zhang and Alexander Markowetz and Vassilis J. Tsotras and Dimitrios Gunopulos and Bernhard Seeger", title = "On computing temporal aggregates with range predicates", journal = j-TODS, volume = "33", number = "2", pages = "12:1--12:??", month = jun, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1366102.1366109", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jun 25 08:39:17 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Computing temporal aggregates is an important but costly operation for applications that maintain time-evolving data (data warehouses, temporal databases, etc.) Due to the large volume of such data, performance improvements for temporal aggregate queries are critical. Previous approaches have aggregate predicates that involve only the time dimension. In this article we examine techniques to compute temporal aggregates that include key-range predicates as well ({\em range-temporal aggregates\/}). In particular we concentrate on the SUM aggregate, while COUNT is a special case. To handle arbitrary key ranges, previous methods would need to keep a separate index for every possible key range. We propose an approach based on a new index structure called the {\em Multiversion SB-Tree}, which incorporates features from both the SB-Tree and the Multiversion B+--tree, to handle arbitrary key-range temporal aggregate queries. We analyze the performance of our approach and present experimental results that show its efficiency. Furthermore, we address a novel and practical variation called {\em functional\/} range-temporal aggregates. Here, the value of any record is a function over time. The meaning of aggregates is altered such that the contribution of a record to the aggregate result is proportional to the size of the intersection between the record's time interval and the query time interval. Both analytical and experimental results show the efficiency of our result.", acknowledgement = ack-nhfb, articleno = "12", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "functional aggregates; indexing; range predicates; temporal aggregates", } @Article{Soliman:2008:PTR, author = "Mohamed A. Soliman and Ihab F. Ilyas and Kevin Chen--Chuan Chang", title = "Probabilistic top-$k$ and ranking-aggregate queries", journal = j-TODS, volume = "33", number = "3", pages = "13:1--13:??", month = aug, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1386118.1386119", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 29 14:05:10 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Ranking and aggregation queries are widely used in data exploration, data analysis, and decision-making scenarios. While most of the currently proposed ranking and aggregation techniques focus on deterministic data, several emerging applications involve data that is unclean or uncertain. Ranking and aggregating uncertain (probabilistic) data raises new challenges in query semantics and processing, making conventional methods inapplicable. Furthermore, uncertainty imposes probability as a new ranking dimension that does not exist in the traditional settings.\par In this article we introduce new probabilistic formulations for top-$k$ and ranking-aggregate queries in probabilistic databases. Our formulations are based on marriage of traditional top-$k$ semantics with possible worlds semantics. In the light of these formulations, we construct a generic processing framework supporting both query types, and leveraging existing query processing and indexing capabilities in current RDBMSs. The framework encapsulates a state space model and efficient search algorithms to compute query answers. Our proposed techniques minimize the number of accessed tuples and the size of materialized search space to compute query answers. Our experimental study shows the efficiency of our techniques under different data distributions with orders of magnitude improvement over na{\"\i}ve methods.", acknowledgement = ack-nhfb, articleno = "13", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "aggregation; probabilistic data; Query processing; ranking; top-k", } @Article{Ke:2008:CPM, author = "Yiping Ke and James Cheng and Wilfred Ng", title = "Correlated pattern mining in quantitative databases", journal = j-TODS, volume = "33", number = "3", pages = "14:1--14:??", month = aug, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1386118.1386120", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 29 14:05:10 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We study mining correlations from quantitative databases and show that this is a more effective approach than mining associations to discover useful patterns. We propose the novel notion of {\em quantitative correlated pattern\/} (QCP), which is founded on two formal concepts, mutual information and all-confidence. We first devise a normalization on mutual information and apply it to the problem of QCP mining to capture the dependency between the attributes. We further adopt all-confidence as a quality measure to ensure, at a finer granularity, the dependency between the attributes with specific quantitative intervals. We also propose an effective supervised method that combines the consecutive intervals of the quantitative attributes based on mutual information, such that the interval-combining is guided by the dependency between the attributes. We develop an algorithm, {\em QCoMine}, to mine QCPs efficiently by utilizing normalized mutual information and all-confidence to perform bilevel pruning. We also identify the redundancy existing in the set of QCPs and propose effective techniques to eliminate the redundancy. Our extensive experiments on both real and synthetic datasets verify the efficiency of {\em QCoMine\/} and the quality of the QCPs. The experimental results also justify the effectiveness of our proposed techniques for redundancy elimination. To further demonstrate the usefulness and the quality of QCPs, we study an application of QCPs to classification. We demonstrate that the classifier built on the QCPs achieves higher classification accuracy than the state-of-the-art classifiers built on association rules.", acknowledgement = ack-nhfb, articleno = "14", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "correlated patterns; information-theoretic approach; mutual information; Quantitative databases", } @Article{Rusu:2008:SSJ, author = "Florin Rusu and Alin Dobra", title = "Sketches for size of join estimation", journal = j-TODS, volume = "33", number = "3", pages = "15:1--15:??", month = aug, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1386118.1386121", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 29 14:05:10 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Sketching techniques provide approximate answers to aggregate queries both for data-streaming and distributed computation. Small space summaries that have linearity properties are required for both types of applications. The prevalent method for analyzing sketches uses moment analysis and distribution-independent bounds based on moments. This method produces clean, easy to interpret, theoretical bounds that are especially useful for deriving asymptotic results. However, the theoretical bounds obscure fine details of the behavior of various sketches and they are mostly not indicative of which type of sketches should be used in practice. Moreover, no significant empirical comparison between various sketching techniques has been published, which makes the choice even harder. In this article we take a close look at the sketching techniques proposed in the literature from a statistical point of view with the goal of determining properties that indicate the actual behavior and producing tighter confidence bounds. Interestingly, the statistical analysis reveals that two of the techniques, Fast-AGMS and Count-Min, provide results that are in some cases orders of magnitude better than the corresponding theoretical predictions. We conduct an extensive empirical study that compares the different sketching techniques in order to corroborate the statistical analysis with the conclusions we draw from it. The study indicates the expected performance of various sketches, which is crucial if the techniques are to be used by practitioners. The overall conclusion of the study is that Fast-AGMS sketches are, for the full spectrum of problems, either the best, or close to the best, sketching technique. We apply the insights obtained from the statistical study and the experimental results to design effective algorithms for sketching interval data. We show how the two basic methods for sketching interval data, DMAP and fast range-summation, can be improved significantly with respect to the update time without a significant loss in accuracy. The gain in update time can be as large as two orders of magnitude, thus making the improved methods practical. The empirical study suggests that DMAP is preferable when update time is the critical requirement and fast range-summation is desirable for better accuracy.", acknowledgement = ack-nhfb, articleno = "15", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "AGMS sketches; Count-Min sketches; DMAP; fast range-summation; Fast-AGMS sketches; Fast-Count sketches; Size of join estimation", } @Article{Xu:2008:CBS, author = "Fei Xu and Christopher Jermaine and Alin Dobra", title = "Confidence bounds for sampling-based group by estimates", journal = j-TODS, volume = "33", number = "3", pages = "16:1--16:??", month = aug, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1386118.1386122", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 29 14:05:10 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Sampling is now a very important data management tool, to such an extent that an interface for database sampling is included in the latest SQL standard. In this article we reconsider in depth what at first may seem like a very simple problem --- computing the error of a sampling-based guess for the answer to a GROUP BY query over a multitable join. The difficulty when sampling for the answer to such a query is that the same sample will be used to guess the result of the query for each group, which induces correlations among the estimates. Thus, from a statistical point-of-view it is very problematic and even dangerous to use traditional methods such as confidence intervals for communicating estimate accuracy to the user. We explore ways to address this problem, and pay particular attention to the computational aspects of computing ``safe'' confidence intervals.", acknowledgement = ack-nhfb, articleno = "16", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Approximate query processing; multiple hypothesis testing; sampling", } @Article{LeFevre:2008:WAA, author = "Kristen LeFevre and David J. DeWitt and Raghu Ramakrishnan", title = "Workload-aware anonymization techniques for large-scale datasets", journal = j-TODS, volume = "33", number = "3", pages = "17:1--17:??", month = aug, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1386118.1386123", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 29 14:05:10 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Protecting individual privacy is an important problem in microdata distribution and publishing. Anonymization algorithms typically aim to satisfy certain privacy definitions with minimal impact on the quality of the resulting data. While much of the previous literature has measured quality through simple one-size-fits-all measures, we argue that quality is best judged with respect to the workload for which the data will ultimately be used.\par This article provides a suite of anonymization algorithms that incorporate a target class of workloads, consisting of one or more data mining tasks as well as selection predicates. An extensive empirical evaluation indicates that this approach is often more effective than previous techniques. In addition, we consider the problem of scalability. The article describes two extensions that allow us to scale the anonymization algorithms to datasets much larger than main memory. The first extension is based on ideas from scalable decision trees, and the second is based on sampling. A thorough performance evaluation indicates that these techniques are viable in practice.", acknowledgement = ack-nhfb, articleno = "17", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "anonymity; data mining; Databases; performance; privacy; scalability", } @Article{Karras:2008:HSO, author = "Panagiotis Karras and Nikos Mamoulis", title = "Hierarchical synopses with optimal error guarantees", journal = j-TODS, volume = "33", number = "3", pages = "18:1--18:??", month = aug, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1386118.1386124", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 29 14:05:10 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Hierarchical synopsis structures offer a viable alternative in terms of efficiency and flexibility in relation to traditional summarization techniques such as histograms. Previous research on such structures has mostly focused on a single model, based on the Haar wavelet decomposition. In previous work, we have introduced a more refined, wavelet-inspired hierarchical index structure for synopsis construction: the Haar$^+$ tree. The chief advantages of this structure are twofold. First, it achieves higher synopsis quality at the task of summarizing data sets with sharp discontinuities than state-of-the-art histogram and Haar wavelet techniques. Second, thanks to its search space delimitation capacity, Haar$^+$ synopsis construction operates in time {\em linear\/} in the size of the data set for {\em any\/} monotonic distributive error metric. Contemporaneous research has introduced another hierarchical synopsis structure, the compact hierarchical histogram (CHH). In this article, we elaborate on both these structures. First, we formally prove that the CHH, in its default binary-hierarchy form, is a simplified variant of a Haar$^+$ tree. We then focus on the summarization problem, with both these hierarchical synopsis structures, in which an error guarantee expressed by a {\em maximum-error\/} metric is required. We show that this problem is most efficiently solved through its dual, space-minimization counterpart, which can also achieve {\em optimal quality}. In this case, there is a benefit to be gained by specializing the algorithm for each structure; hence, our algorithm for optimal-quality maximum-error CHH requires {\em low polynomial\/} time; on the other hand, optimal-quality Haar$^+$ synopses for maximum-error metrics are constructed in exponential time; hence, we also develop a low-polynomial-time approximation scheme for the maximum-error Haar$^+$ case. Furthermore, we extend our approach for both general-error and maximum-error Haar$^+$ synopses to arbitrary dimensionality. In our experimental study, (i) we confirm the theoretically expected superiority of Haar$^+$ synopses over Haar wavelet methods in both construction time and achieved quality for representative error metrics; (ii) we demonstrate that Haar$^+$ synopses are also constructed faster than optimal plain histograms, and, moreover, achieve higher synopsis quality with highly discontinuous data sets; such an advantage of a hierarchical synopsis structure over a histogram had been intuitively expressed, but never experimentally verified; and (iii) we show that Haar$^+$ synopsis quality supersedes that of a CHH.", acknowledgement = ack-nhfb, articleno = "18", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "approximate query processing; data synopses; Summarization", } @Article{Lester:2008:EOI, author = "Nicholas Lester and Alistair Moffat and Justin Zobel", title = "Efficient online index construction for text databases", journal = j-TODS, volume = "33", number = "3", pages = "19:1--19:??", month = aug, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1386118.1386125", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 29 14:05:10 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Inverted index structures are a core element of current text retrieval systems. They can be constructed quickly using offline approaches, in which one or more passes are made over a static set of input data, and, at the completion of the process, an index is available for querying. However, there are search environments in which even a small delay in timeliness cannot be tolerated, and the index must always be queryable and up to date. Here we describe and analyze a {\em geometric partitioning\/} mechanism for online index construction that provides a range of tradeoffs between costs, and can be adapted to different balances of insertion and querying operations. Detailed experimental results are provided that show the extent of these tradeoffs, and that these new methods can yield substantial savings in online indexing costs.", acknowledgement = ack-nhfb, articleno = "19", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Index construction; index update; search engines; text indexing", } @Article{Ozsoyoglu:2008:FTS, author = "Z. Meral {\"O}zsoyo{\u{g}}lu", title = "Foreword to {TODS SIGMOD\slash PODS\slash ICDT 2007} special issue", journal = j-TODS, volume = "33", number = "4", pages = "20:1--20:??", month = nov, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1412331.1412332", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 23 11:45:08 MST 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "20", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Ooi:2008:IAS, author = "Beng Chin Ooi", title = "Introduction to {ACM SIGMOD 2007} special section", journal = j-TODS, volume = "33", number = "4", pages = "21:1--21:??", month = nov, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1412331.1412333", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 23 11:45:08 MST 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "21", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Melnik:2008:CMB, author = "Sergey Melnik and Atul Adya and Philip A. Bernstein", title = "Compiling mappings to bridge applications and databases", journal = j-TODS, volume = "33", number = "4", pages = "22:1--22:??", month = nov, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1412331.1412334", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 23 11:45:08 MST 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Translating data and data access operations between applications and databases is a longstanding data management problem. We present a novel approach to this problem, in which the relationship between the application data and the persistent storage is specified using a declarative mapping, which is compiled into bidirectional views that drive the data transformation engine. Expressing the application model as a view on the database is used to answer queries, while expressing the database schema as a view on the application model allows us to leverage view maintenance algorithms for update translation. This approach has been implemented in a commercial product. It enables developers to interact with a relational database via a conceptual schema and an object-oriented programming surface. We outline the implemented system and focus on the challenges of mapping compilation, which include rewriting queries under constraints and supporting nonrelational constructs.", acknowledgement = ack-nhfb, articleno = "22", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Mapping; query rewriting; updateable views", } @Article{Jermaine:2008:SAQ, author = "Chris Jermaine and Subramanian Arumugam and Abhijit Pol and Alin Dobra", title = "Scalable approximate query processing with the {DBO} engine", journal = j-TODS, volume = "33", number = "4", pages = "23:1--23:??", month = nov, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1412331.1412335", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 23 11:45:08 MST 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "This article describes query processing in the DBO database system. Like other database systems designed for ad hoc analytic processing, DBO is able to compute the exact answers to queries over a large relational database in a scalable fashion. Unlike any other system designed for analytic processing, DBO can constantly maintain a guess as to the final answer to an aggregate query throughout execution, along with statistically meaningful bounds for the guess's accuracy. As DBO gathers more and more information, the guess gets more and more accurate, until it is 100\% accurate as the query is completed. This allows users to stop the execution as soon as they are happy with the query accuracy, and thus encourages exploratory data analysis.", acknowledgement = ack-nhfb, articleno = "23", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Online aggregation; randomized algorithms; sampling", } @Article{Libkin:2008:IPS, author = "Leonid Libkin", title = "Introduction to the {PODS 2007} special section", journal = j-TODS, volume = "33", number = "4", pages = "24:1--24:??", month = nov, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1412331.1412336", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 23 11:45:08 MST 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "24", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Fan:2008:ECX, author = "Wenfei Fan and Floris Geerts and Frank Neven", title = "Expressiveness and complexity of {XML} publishing transducers", journal = j-TODS, volume = "33", number = "4", pages = "25:1--25:??", month = nov, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1412331.1412337", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 23 11:45:08 MST 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "A number of languages have been developed for specifying XML publishing, that is, transformations of relational data into XML trees. These languages generally describe the behaviors of a middleware controller that builds an output tree iteratively, issuing queries to a relational source and expanding the tree with the query results at each step. To study the complexity and expressive power of XML publishing languages, this article proposes a notion of {\em publishing transducers}, which generate XML trees from relational data. We study a variety of publishing transducers based on what relational queries a transducer can issue, what temporary stores a transducer can use during tree generation, and whether or not some tree nodes are allowed to be virtual, that is, excluded from the output tree. We first show how existing XML publishing languages can be characterized by such transducers, and thus provide a synergy between theory and practice. We then study the membership, emptiness, and equivalence problems for various classes of transducers. We establish lower and upper bounds, all matching, ranging from PTIME to undecidable. Finally, we investigate the expressive power of these transducers and existing languages. We show that when treated as relational query languages, different classes of transducers capture either complexity classes (e.g., PSPACE) or fragments of datalog (e.g., linear datalog). For tree generation, we establish connections between publishing transducers and logical transductions, among other things.", acknowledgement = ack-nhfb, articleno = "25", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "complexity; data exchange; expressiveness; transducer; XML publishing", } @Article{Jayram:2008:ESA, author = "T. S. Jayram and Andrew McGregor and S. Muthukrishnan and Erik Vee", title = "Estimating statistical aggregates on probabilistic data streams", journal = j-TODS, volume = "33", number = "4", pages = "26:1--26:??", month = nov, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1412331.1412338", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 23 11:45:08 MST 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The probabilistic stream model was introduced by Jayram et al. [2007]. It is a generalization of the data stream model that is suited to handling {\em probabilistic\/} data, where each item of the stream represents a probability distribution over a set of possible events. Therefore, a probabilistic stream determines a distribution over a potentially exponential number of classical {\em deterministic\/} streams, where each item is deterministically one of the domain values.\par We present algorithms for computing commonly used aggregates on a probabilistic stream. We present the first one pass streaming algorithms for estimating the expected mean of a probabilistic stream. Next, we consider the problem of estimating frequency moments for probabilistic data. We propose a general approach to obtain unbiased estimators working over probabilistic data by utilizing unbiased estimators designed for standard streams. Applying this approach, we extend a classical data stream algorithm to obtain a one-pass algorithm for estimating $ F_2 $, the second frequency moment. We present the first known streaming algorithms for estimating $ F_0 $, the number of distinct items on probabilistic streams. Our work also gives an efficient one-pass algorithm for estimating the median, and a two-pass algorithm for estimating the range.", acknowledgement = ack-nhfb, articleno = "26", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "frequency moments; mean; median; OLAP; Probabilistic streams", } @Article{Schwentick:2008:IIS, author = "Thomas Schwentick and Dan Suciu", title = "Introduction to {ICDT 2007} special section", journal = j-TODS, volume = "33", number = "4", pages = "27:1--27:??", month = nov, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1412331.1412339", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 23 11:45:08 MST 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "27", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Buneman:2008:EIP, author = "Peter Buneman and James Cheney and Stijn Vansummeren", title = "On the expressiveness of implicit provenance in query and update languages", journal = j-TODS, volume = "33", number = "4", pages = "28:1--28:??", month = nov, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1412331.1412340", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 23 11:45:08 MST 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Information describing the origin of data, generally referred to as {\em provenance}, is important in scientific and curated databases where it is the basis for the trust one puts in their contents. Since such databases are constructed using operations of both query and update languages, it is of paramount importance to describe the effect of these languages on provenance.\par In this article we study provenance for query and update languages that are closely related to SQL, and compare two ways in which they can manipulate provenance so that elements of the input are rearranged to elements of the output: {\em implicit provenance}, where a query or update only provides the rearranged output, and provenance is provided implicitly by a default provenance semantics; and {\em explicit provenance}, where a query or update provides both the output and the description of the provenance of each component of the output. Although explicit provenance is in general more expressive, we show that the classes of implicit provenance operations expressible by query and update languages correspond to natural semantic subclasses of the explicit provenance queries.\par One of the consequences of this study is that provenance separates the expressive power of query and update languages. The model is also relevant to annotation propagation schemes in which annotations on the input to a query or update have to be transferred to the output or vice versa.", acknowledgement = ack-nhfb, articleno = "28", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "conservativity; nested relational calculus; nested update language; Provenance", } @Article{Ghelli:2008:CAX, author = "Giorgio Ghelli and Kristoffer Rose and J{\'e}r{\^o}me Sim{\'e}on", title = "Commutativity analysis for {XML} updates", journal = j-TODS, volume = "33", number = "4", pages = "29:1--29:??", month = nov, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1412331.1412341", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 23 11:45:08 MST 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "An effective approach to support XML updates is to use XQuery extended with update operations. This approach results in very expressive languages which are convenient for users but are difficult to optimize or reason about. A crucial question underlying many static analysis problems for such languages, from optimization to view maintenance, is whether two expressions commute. Unfortunately, commutativity is undecidable for most existing XML update languages. In this article, we propose a conservative analysis for an expressive XML update language that can be used to determine commutativity. The approach relies on a form of path analysis that computes upper bounds for the nodes that are accessed or modified in a given expression. Our main result is a theorem that can be used to identify commuting expressions. We illustrate how the technique applies to concrete examples of query optimization in the presence of updates.", acknowledgement = ack-nhfb, articleno = "29", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "commutativity; optimization; updates; XML; XQuery", } @Article{Pavlou:2008:FAD, author = "Kyriacos E. Pavlou and Richard T. Snodgrass", title = "Forensic analysis of database tampering", journal = j-TODS, volume = "33", number = "4", pages = "30:1--30:??", month = nov, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1412331.1412342", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 23 11:45:08 MST 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Regulations and societal expectations have recently expressed the need to mediate access to valuable databases, even by insiders. One approach is tamper detection via cryptographic hashing. This article shows how to determine when the tampering occurred, what data was tampered with, and perhaps, ultimately, who did the tampering, via forensic analysis. We present four successively more sophisticated forensic analysis algorithms: the Monochromatic, RGBY, Tiled Bitmap, and a3D algorithms, and characterize their ``forensic cost'' under worst-case, best-case, and average-case assumptions on the distribution of corruption sites. A lower bound on forensic cost is derived, with RGBY and a3D being shown optimal for a large number of corruptions. We also provide validated cost formul{\ae} for these algorithms and recommendations for the circumstances in which each algorithm is indicated.", acknowledgement = ack-nhfb, articleno = "30", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "a3D algorithm; compliant records; forensic analysis algorithm; forensic cost; Monochromatic algorithm; Polychromatic algorithm; RGBY algorithm; Tiled Bitmap algorithm", } @Article{Bartolini:2008:ESB, author = "Ilaria Bartolini and Paolo Ciaccia and Marco Patella", title = "Efficient sort-based skyline evaluation", journal = j-TODS, volume = "33", number = "4", pages = "31:1--31:??", month = nov, year = "2008", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1412331.1412343", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 23 11:45:08 MST 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Skyline queries compute the set of Pareto-optimal tuples in a relation, that is, those tuples that are not {\em dominated\/} by any other tuple in the same relation. Although several algorithms have been proposed for efficiently evaluating skyline queries, they either necessitate the relation to have been indexed or have to perform the dominance tests on {\em all\/} the tuples in order to determine the result. In this article we introduce salsa, a novel skyline algorithm that exploits the idea of presorting the input data so as to effectively {\em limit\/} the number of tuples to be read and compared. This makes salsa also attractive when skyline queries are executed on top of systems that do not understand skyline semantics, or when the skyline logic runs on clients with limited power and/or bandwidth. We prove that, if one considers symmetric sorting functions, the number of tuples to be read is minimized by sorting data according to a ``minimum coordinate,'' minC, criterion, and that performance can be further improved if data distribution is known and an asymmetric sorting function is used. Experimental results obtained on synthetic and real datasets show that salsa consistently outperforms state-of-the-art sequential skyline algorithms and that its performance can be accurately predicted.", acknowledgement = ack-nhfb, articleno = "31", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Monotone functions; Skyline query", } @Article{Mishra:2009:DQM, author = "Chaitanya Mishra and Nick Koudas", title = "The design of a query monitoring system", journal = j-TODS, volume = "34", number = "1", pages = "1:1--1:??", month = apr, year = "2009", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1508857.1508858", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 2 12:22:33 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Query monitoring refers to the problem of observing and predicting various parameters related to the execution of a query in a database system. In addition to being a useful tool for database users and administrators, it can also serve as an information collection service for resource allocation and adaptive query processing techniques. In this article, we present a query monitoring system from the ground up, describing various new techniques for query monitoring, their implementation inside a real database system, and a novel interface that presents the observed and predicted information in an accessible manner. To enable this system, we introduce several lightweight online techniques for progressively estimating and refining the cardinality of different relational operators using information collected at query execution time. These include binary and multiway joins as well as typical grouping operations and combinations thereof. We describe the various algorithms used to efficiently implement estimators and present the results of an evaluation of a prototype implementation of our framework in an open-source data management system. Our results demonstrate the feasibility and practical utility of the approach presented herein.", acknowledgement = ack-nhfb, articleno = "1", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "progress estimation; Query monitoring", } @Article{Cheng:2009:EQP, author = "James Cheng and Yiping Ke and Wilfred Ng", title = "Efficient query processing on graph databases", journal = j-TODS, volume = "34", number = "1", pages = "2:1--2:??", month = apr, year = "2009", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1508857.1508859", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 2 12:22:33 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We study the problem of processing {\em subgraph queries\/} on a database that consists of a set of graphs. The answer to a subgraph query is the set of graphs in the database that are supergraphs of the query. In this article, we propose an efficient index, {\em FG*-index}, to solve this problem.\par The cost of processing a subgraph query using most existing indexes mainly consists of two parts: the {\em index probing\/} cost and the {\em candidate verification\/} cost. Index probing is to find the query in the index, or to find the graphs from which we can generate a candidate answer set for the query. Candidate verification is to test whether each graph in the candidate set is indeed a supergraph of the query. We design FG*-index to minimize these two costs as follows.\par FG*-index consists of three components: the {\em FG-index}, the {\em feature-index}, and the {\em FAQ-index}. First, the FG-index employs the concept of {\em Frequent subGraph\/} ({\em FG\/}) to allow the set of queries that are FGs to be answered without candidate verification. We call this set of queries {\em FG-queries}. We can enlarge the set of FG-queries so that more queries can be answered without candidate verification; however, a larger set of FG-queries implies a larger FG-index and hence the index probing cost also increases. We propose the feature-index to reduce the index probing cost. The feature-index uses features to filter false results that are matched in the FG-index, so that we can quickly find the truly matching graphs for a query. For processing non-FG-queries, we propose the FAQ-index, which is dynamically constructed from the set of {\em Frequently Asked non-FG-Queries\/} ({\em FAQs\/}). Using the FAQ-index, verification is not required for processing FAQs and only a small number of candidates need to be verified for processing non-FG-queries that are {\em not frequently asked}. Finally, a comprehensive set of experiments verifies that query processing using FG*-index is up to orders of magnitude more efficient than state-of-the-art indexes and it is also more scalable.", acknowledgement = ack-nhfb, articleno = "2", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "frequent subgraphs; Graph databases; graph indexing; graph query processing", } @Article{Spiegel:2009:TSA, author = "Joshua Spiegel and Neoklis Polyzotis", title = "{TuG} synopses for approximate query answering", journal = j-TODS, volume = "34", number = "1", pages = "3:1--3:??", month = apr, year = "2009", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1508857.1508860", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 2 12:22:33 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "This article introduces the Tuple Graph (TuG) synopses, a new class of data summaries that enable accurate approximate answers for complex relational queries. The proposed summarization framework adopts a ``semi-structured'' view of the relational database, modeling a relational data set as a graph of tuples and join queries as graph traversals, respectively. The key idea is to approximate the structure of the induced data graph in a concise synopsis, and to approximate the answer to a query by performing the corresponding traversal over the summarized graph. We detail the (TuG) synopsis model that is based on this novel approach, and we describe an efficient and scalable construction algorithm for building accurate (TuG) within a specific storage budget. We validate the performance of (TuG) with an extensive experimental study on real-life and synthetic datasets. Our results verify the effectiveness of (TuG) in generating accurate approximate answers for complex join queries, and demonstrate their benefits over existing summarization techniques.", acknowledgement = ack-nhfb, articleno = "3", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "approximate query processing; Data synopses; selectivity estimation", } @Article{Kramer:2009:SIC, author = "J{\"u}rgen Kr{\"a}mer and Bernhard Seeger", title = "Semantics and implementation of continuous sliding window queries over data streams", journal = j-TODS, volume = "34", number = "1", pages = "4:1--4:??", month = apr, year = "2009", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1508857.1508861", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 2 12:22:33 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In recent years the processing of continuous queries over potentially infinite data streams has attracted a lot of research attention. We observed that the majority of work addresses individual stream operations and system-related issues rather than the development of a general-purpose basis for stream processing systems. Furthermore, example continuous queries are often formulated in some declarative query language without specifying the underlying semantics precisely enough. To overcome these deficiencies, this article presents a consistent and powerful operator algebra for data streams which ensures that continuous queries have well-defined, deterministic results. In analogy to traditional database systems, we distinguish between a logical and a physical operator algebra. While the logical algebra specifies the semantics of the individual operators in a descriptive but concrete way over temporal multisets, the physical algebra provides efficient implementations in the form of stream-to-stream operators. By adapting and enhancing research from temporal databases to meet the challenging requirements in streaming applications, we are able to carry over the conventional transformation rules from relational databases to stream processing. For this reason, our approach not only makes it possible to express continuous queries with a sound semantics, but also provides a solid foundation for query optimization, one of the major research topics in the stream community. Since this article seamlessly explains the steps from query formulation to query execution, it outlines the innovative features and operational functionality implemented in our state-of-the-art stream processing infrastructure.", acknowledgement = ack-nhfb, articleno = "4", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "continuous queries; data streams; query optimization; Semantics", } @Article{Jain:2009:QAO, author = "Alpa Jain and Panagiotis G. Ipeirotis", title = "A quality-aware optimizer for information extraction", journal = j-TODS, volume = "34", number = "1", pages = "5:1--5:??", month = apr, year = "2009", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1508857.1508862", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 2 12:22:33 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "A large amount of structured information is buried in unstructured text. Information extraction systems can extract structured relations from the documents and enable sophisticated, SQL-like queries over unstructured text. Information extraction systems are not perfect and their output has imperfect precision and recall (i.e., contains spurious tuples and misses good tuples). Typically, an extraction system has a set of parameters that can be used as ``knobs'' to tune the system to be either precision- or recall-oriented. Furthermore, the choice of documents processed by the extraction system also affects the quality of the extracted relation. So far, estimating the output quality of an information extraction task has been an ad hoc procedure, based mainly on heuristics. In this article, we show how to use Receiver Operating Characteristic (ROC) curves to estimate the extraction quality in a statistically robust way and show how to use ROC analysis to select the extraction parameters in a principled manner. Furthermore, we present analytic models that reveal how different document retrieval strategies affect the quality of the extracted relation. Finally, we present our maximum likelihood approach for estimating, on the fly, the parameters required by our analytic models to predict the runtime and the output quality of each execution plan. Our experimental evaluation demonstrates that our optimization approach predicts accurately the output quality and selects the fastest execution plan that satisfies the output quality restrictions.", acknowledgement = ack-nhfb, articleno = "5", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Information Extraction; ROC curves", } @Article{Considine:2009:RAA, author = "Jeffrey Considine and Marios Hadjieleftheriou and Feifei Li and John Byers and George Kollios", title = "Robust approximate aggregation in sensor data management systems", journal = j-TODS, volume = "34", number = "1", pages = "6:1--6:??", month = apr, year = "2009", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1508857.1508863", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 2 12:22:33 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In the emerging area of sensor-based systems, a significant challenge is to develop scalable, fault-tolerant methods to extract useful information from the data the sensors collect. An approach to this data management problem is the use of sensor database systems, which allow users to perform aggregation queries such as MIN, COUNT, and AVG on the readings of a sensor network. In addition, more advanced queries such as frequency counting and quantile estimation can be supported. Due to energy limitations in sensor-based networks, centralized data collection is generally impractical, so most systems use in-network aggregation to reduce network traffic. However, even these aggregation strategies remain bandwidth-intensive when combined with the fault-tolerant, multipath routing methods often used in these environments. To avoid this expense, we investigate the use of approximate in-network aggregation using small sketches. We present duplicate-insensitive sketching techniques that can be implemented efficiently on small sensor devices with limited hardware support and we analyze both their performance and accuracy. Finally, we present an experimental evaluation that validates the effectiveness of our methods.", acknowledgement = ack-nhfb, articleno = "6", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "aggregation; approximation algorithms; Sensor databases; sketches; synopses", } @Article{Angiulli:2009:DOP, author = "Fabrizio Angiulli and Fabio Fassetti and Luigi Palopoli", title = "Detecting outlying properties of exceptional objects", journal = j-TODS, volume = "34", number = "1", pages = "7:1--7:??", month = apr, year = "2009", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1508857.1508864", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 2 12:22:33 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Assume you are given a data population characterized by a certain number of attributes. Assume, moreover, you are provided with the information that one of the individuals in this data population is abnormal, but no reason whatsoever is given to you as to why this particular individual is to be considered abnormal. In several cases, you will be indeed interested in discovering such reasons. This article is precisely concerned with this problem of discovering sets of attributes that account for the (a priori stated) abnormality of an individual within a given dataset. A criterion is presented to measure the abnormality of combinations of attribute values featured by the given abnormal individual with respect to the reference population. In this respect, each subset of attributes is intended to somehow represent a ``property'' of individuals. We distinguish between global and local properties. Global properties are subsets of attributes explaining the given abnormality with respect to the entire data population. With local ones, instead, two subsets of attributes are singled out, where the former one justifies the abnormality within the data subpopulation selected using the values taken by the exceptional individual on those attributes included in the latter one. The problem of individuating abnormal properties with associated explanations is formally stated and analyzed. Such a formal characterization is then exploited in order to devise efficient algorithms for detecting both global and local forms of most abnormal properties. The experimental evidence, which is accounted for in the article, shows that the algorithms are both able to mine meaningful information and to accomplish the computational task by examining a negligible fraction of the search space.", acknowledgement = ack-nhfb, articleno = "7", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Data mining; knowledge discovery; outlier characterization", } @Article{Wong:2009:ABA, author = "Raymond Chi-Wing Wong and Ada Wai-Chee Fu and Ke Wang and Jian Pei", title = "Anonymization-based attacks in privacy-preserving data publishing", journal = j-TODS, volume = "34", number = "2", pages = "8:1--8:??", month = jun, year = "2009", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1538909.1538910", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 2 12:23:25 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Data publishing generates much concern over the protection of individual privacy. Recent studies consider cases where the adversary may possess different kinds of knowledge about the data. In this article, we show that knowledge of the mechanism or algorithm of anonymization for data publication can also lead to extra information that assists the adversary and jeopardizes individual privacy. In particular, all known mechanisms try to minimize information loss and such an attempt provides a loophole for attacks. We call such an attack a minimality attack. In this article, we introduce a model called $m$-confidentiality which deals with minimality attacks, and propose a feasible solution. Our experiments show that minimality attacks are practical concerns on real datasets and that our algorithm can prevent such attacks with very little overhead and information loss.", acknowledgement = ack-nhfb, articleno = "8", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data publishing; k -anonymity; l -diversity; minimality attack; Privacy preservation", } @Article{Ghinita:2009:FED, author = "Gabriel Ghinita and Panagiotis Karras and Panos Kalnis and Nikos Mamoulis", title = "A framework for efficient data anonymization under privacy and accuracy constraints", journal = j-TODS, volume = "34", number = "2", pages = "9:1--9:??", month = jun, year = "2009", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1538909.1538911", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 2 12:23:25 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Recent research studied the problem of publishing microdata without revealing sensitive information, leading to the privacy-preserving paradigms of $k$-anonymity and $l$-diversity. $k$-anonymity protects against the identification of an individual's record. $l$-diversity, in addition, safeguards against the association of an individual with specific sensitive information. However, existing approaches suffer from at least one of the following drawbacks: (i) $l$-diversification is solved by techniques developed for the simpler $k$-anonymization problem, causing unnecessary information loss. (ii) The anonymization process is inefficient in terms of computational and I/O cost. (iii) Previous research focused exclusively on the privacy-constrained problem and ignored the equally important accuracy-constrained (or dual) anonymization problem.\par In this article, we propose a framework for efficient anonymization of microdata that addresses these deficiencies. First, we focus on one-dimensional (i.e., single-attribute) quasi-identifiers, and study the properties of optimal solutions under the $k$-anonymity and $l$-diversity models for the privacy-constrained (i.e., direct) and the accuracy-constrained (i.e., dual) anonymization problems. Guided by these properties, we develop efficient heuristics to solve the one-dimensional problems in linear time. Finally, we generalize our solutions to multidimensional quasi-identifiers using space-mapping techniques. Extensive experimental evaluation shows that our techniques clearly outperform the existing approaches in terms of execution time and information loss.", acknowledgement = ack-nhfb, articleno = "9", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "anonymity; Privacy", } @Article{Hartmann:2009:ERA, author = "Sven Hartmann and Sebastian Link", title = "Efficient reasoning about a robust {XML} key fragment", journal = j-TODS, volume = "34", number = "2", pages = "10:1--10:??", month = jun, year = "2009", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1538909.1538912", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 2 12:23:25 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We review key constraints in the context of XML as introduced by Buneman et al. We demonstrate that:\par (1) one of the proposed inference rules is not sound in general, and\par (2) the inference rules are incomplete for XML key implication, even for nonempty sets of simple key paths.\par This shows, in contrast to earlier statements, that the axiomatizability of XML keys is still open, and efficient algorithms for deciding their implication still need to be developed. Solutions to these problems have a wide range of applications including consistency validation, XML schema design, data exchange and integration, consistent query answering, XML query optimization and rewriting, and indexing.\par In this article, we investigate the axiomatizability and implication problem for XML keys with nonempty sets of simple key paths. In particular, we propose a set of inference rules that is indeed sound and complete for the implication of such XML keys. We demonstrate that this fragment is robust by showing the duality of XML key implication to the reachability problem of fixed nodes in a suitable digraph. This enables us to develop a quadratic-time algorithm for deciding implication, and shows that reasoning about this XML key fragment is practically efficient. Therefore, XML applications can be unlocked effectively since they benefit not only from those XML keys specified explicitly by the data designer but also from those that are specified implicitly.", acknowledgement = ack-nhfb, articleno = "10", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "axiomatization; implication; reachability; XML data; XML key", } @Article{Lin:2009:SII, author = "Yi Lin and Bettina Kemme and Ricardo Jim{\'e}nez-Peris and Marta Pati{\~n}o-Mart{\'\i}nez and Jos{\'e} Enrique Armend{\'a}riz-I{\~n}igo", title = "Snapshot isolation and integrity constraints in replicated databases", journal = j-TODS, volume = "34", number = "2", pages = "11:1--11:??", month = jun, year = "2009", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1538909.1538913", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 2 12:23:25 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Database replication is widely used for fault tolerance and performance. However, it requires replica control to keep data copies consistent despite updates. The traditional correctness criterion for the concurrent execution of transactions in a replicated database is 1-copy-serializability. It is based on serializability, the strongest isolation level in a nonreplicated system. In recent years, however, Snapshot Isolation (SI), a slightly weaker isolation level, has become popular in commercial database systems. There exist already several replica control protocols that provide SI in a replicated system. However, most of the correctness reasoning for these protocols has been rather informal. Additionally, most of the work so far ignores the issue of integrity constraints. In this article, we provide a formal definition of 1-copy-SI using and extending a well-established definition of SI in a nonreplicated system. Our definition considers integrity constraints in a way that conforms to the way integrity constraints are handled in commercial systems. We discuss a set of necessary and sufficient conditions for a replicated history to be producible under 1-copy-SI. This makes our formalism a convenient tool to prove the correctness of replica control algorithms.", acknowledgement = ack-nhfb, articleno = "11", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "integrity constraints; Replication; snapshot isolation", } @Article{Su:2009:OOA, author = "Weifeng Su and Jiying Wang and Frederick H. Lochovsky", title = "{ODE}: Ontology-assisted data extraction", journal = j-TODS, volume = "34", number = "2", pages = "12:1--12:??", month = jun, year = "2009", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1538909.1538914", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 2 12:23:25 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Online databases respond to a user query with result records encoded in HTML files. Data extraction, which is important for many applications, extracts the records from the HTML files automatically. We present a novel data extraction method, ODE (Ontology-assisted Data Extraction), which automatically extracts the query result records from the HTML pages. ODE first constructs an ontology for a domain according to information matching between the query interfaces and query result pages from different Web sites within the same domain. Then, the constructed domain ontology is used during data extraction to identify the query result section in a query result page and to align and label the data values in the extracted records. The ontology-assisted data extraction method is fully automatic and overcomes many of the deficiencies of current automatic data extraction methods. Experimental results show that ODE is extremely accurate for identifying the query result section in an HTML page, segmenting the query result section into query result records, and aligning and labeling the data values in the query result records.", acknowledgement = ack-nhfb, articleno = "12", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data value alignment; Domain ontology; label assignment", } @Article{Agarwal:2009:ISS, author = "Pankaj K. Agarwal and Junyi Xie and Jun Yang and Hai Yu", title = "Input-sensitive scalable continuous join query processing", journal = j-TODS, volume = "34", number = "3", pages = "13:1--13:??", month = aug, year = "2009", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1567274.1567275", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Aug 31 16:11:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "This article considers the problem of scalably processing a large number of continuous queries. Our approach, consisting of novel data structures and algorithms and a flexible processing framework, advances the state-of-the-art in several ways. First, our approach is query sensitive in the sense that it exploits potential overlaps in query predicates for efficient group processing. We partition the collection of continuous queries into groups based on the clustering patterns of the query predicates, and apply specialized processing strategies to heavily clustered groups (or {\em hotspots\/}). We show how to maintain the hotspots efficiently, and use them to scalably process continuous select-join, band-join, and window-join queries. Second, our approach is also data sensitive, in the sense that it makes cost-based decisions on how to process each incoming tuple based on its characteristics. Experiments demonstrate that our approach can improve the processing throughput by orders of magnitude.", acknowledgement = ack-nhfb, articleno = "13", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Continuous queries; data streams; event matching; publish/subscribe", } @Article{Sharifzadeh:2009:PSS, author = "Mehdi Sharifzadeh and Cyrus Shahabi and Leyla Kazemi", title = "Processing spatial skyline queries in both vector spaces and spatial network databases", journal = j-TODS, volume = "34", number = "3", pages = "14:1--14:??", month = aug, year = "2009", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1567274.1567276", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Aug 31 16:11:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In this article, we first introduce the concept of Spatial Skyline Queries (SSQ). Given a set of data points $P$ and a set of query points $Q$, each data point has a number of {\em derived spatial\/} attributes each of which is the point's distance to a query point. An SSQ retrieves those points of $P$ which are not dominated by any other point in $P$ considering their derived spatial attributes. The main difference with the regular skyline query is that this {\em spatial domination\/} depends on the location of the query points $Q$. SSQ has application in several domains such as emergency response and online maps. The main intuition and novelty behind our approaches is that we exploit the geometric properties of the SSQ problem space to avoid the exhaustive examination of all the point pairs in $P$ and $Q$. Consequently, we reduce the complexity of SSQ search from $ O(|P|^2 |Q|) $ to $ O(|S|^2 |C| + \sqrt {|P|}) $, where $ |S| $ and $ |C| $ are the solution size and the number of vertices of the convex hull of $Q$, respectively.\par Considering Euclidean distance, we propose two algorithms, $ B^2 S^2 $ and VS$^2$, for static query points and one algorithm, VCS$^2$, for streaming $Q$ whose points change location over time (e.g., are mobile). VCS$^2$ exploits the pattern of change in $Q$ to avoid unnecessary recomputation of the skyline and hence efficiently perform updates. We also propose two algorithms, SNS$^2$ and VSNS$^2$, that compute the spatial skyline with respect to the network distance in a spatial network database. Our extensive experiments using real-world datasets verify that both R-tree-based $ B^2 S^2 $ and Voronoi-based VS$^2$ outperform the best competitor approach in terms of both processing time and I/O cost. Furthermore, their output computed based on Euclidean distance is a good approximation of the spatial skyline in network space. For accurate computation of spatial skylines in network space, our experiments showed the superiority of VSNS$^2$ over SNS$^2$.", acknowledgement = ack-nhfb, articleno = "14", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "spatial databases; Spatial skyline; Voronoi diagrams", } @Article{Yi:2009:SSG, author = "Ke Yi and Feifei Li and Graham Cormode and Marios Hadjieleftheriou and George Kollios and Divesh Srivastava", title = "Small synopses for group-by query verification on outsourced data streams", journal = j-TODS, volume = "34", number = "3", pages = "15:1--15:??", month = aug, year = "2009", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1567274.1567277", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Aug 31 16:11:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Due to the overwhelming flow of information in many data stream applications, data outsourcing is a natural and effective paradigm for individual businesses to address the issue of scale. In the standard data outsourcing model, the data owner outsources streaming data to one or more third-party servers, which answer queries posed by a potentially large number of clients on the data owner's behalf. Data outsourcing intrinsically raises issues of trust, making outsourced query assurance on data streams a problem with important practical implications. Existing solutions proposed in this model all build upon cryptographic primitives such as signatures and collision-resistant hash functions, which only work for certain types of queries, for example, simple selection/aggregation queries.\par In this article, we consider another common type of queries, namely, ``GROUP BY, SUM'' queries, which previous techniques fail to support. Our new solutions are not based on cryptographic primitives, but instead use algebraic and probabilistic techniques to compute a small synopsis on the true query result, which is then communicated to the client so as to verify the correctness of the query result returned by the server. The synopsis uses a constant amount of space irrespective of the result size, has an extremely small probability of failure, and can be maintained using no extra space when the query result changes as elements stream by. We then generalize our synopsis to allow some tolerance on the number of erroneous groups, in order to support semantic load shedding on the server. When the number of erroneous groups is indeed tolerable, the synopsis can be strengthened so that we can locate and even correct these errors. Finally, we implement our techniques and perform an empirical evaluation using live network traffic.", acknowledgement = ack-nhfb, articleno = "15", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data streams; outsourcing; Synopses", } @Article{Perez:2009:SCS, author = "Jorge P{\'e}rez and Marcelo Arenas and Claudio Gutierrez", title = "Semantics and complexity of {SPARQL}", journal = j-TODS, volume = "34", number = "3", pages = "16:1--16:??", month = aug, year = "2009", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1567274.1567278", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Aug 31 16:11:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "SPARQL is the standard language for querying RDF data. In this article, we address systematically the formal study of the database aspects of SPARQL, concentrating in its graph pattern matching facility. We provide a compositional semantics for the core part of SPARQL, and study the complexity of the evaluation of several fragments of the language. Among other complexity results, we show that the evaluation of general SPARQL patterns is PSPACE-complete. We identify a large class of SPARQL patterns, defined by imposing a simple and natural syntactic restriction, where the query evaluation problem can be solved more efficiently. This restriction gives rise to the class of well-designed patterns. We show that the evaluation problem is coNP-complete for well-designed patterns. Moreover, we provide several rewriting rules for well-designed patterns whose application may have a considerable impact in the cost of evaluating SPARQL queries.", acknowledgement = ack-nhfb, articleno = "16", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Complexity; query language; RDF; semantic Web; SPARQL", } @Article{Markowetz:2009:KSR, author = "Alexander Markowetz and Yin Yang and Dimitris Papadias", title = "Keyword search over relational tables and streams", journal = j-TODS, volume = "34", number = "3", pages = "17:1--17:??", month = aug, year = "2009", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1567274.1567279", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Aug 31 16:11:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "{\em Relational Keyword Search\/} (R-KWS) provides an intuitive way to query relational data without requiring SQL, or knowledge of the underlying schema. In this article we describe a comprehensive framework for R-KWS covering snapshot queries on conventional tables and continuous queries on relational streams. Our contributions are summarized as follows: (i) We provide formal semantics, addressing the temporal validity and order of results, spanning uniformly over tables and streams; (ii) we investigate two general methodologies for query processing, {\em graph based\/} and {\em operator based}, that resolve several problems of previous approaches; and (iii) we develop a range of algorithms and optimizations covering both methodologies. We demonstrate the effectiveness of R-KWS, as well as the significant performance benefits of the proposed techniques, through extensive experiments with static and streaming datasets.", acknowledgement = ack-nhfb, articleno = "17", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "data graph; data streams; query processing; relational databases; Search", } @Article{Cohen:2009:ICP, author = "Sara Cohen and Benny Kimelfeld and Yehoshua Sagiv", title = "Incorporating constraints in probabilistic {XML}", journal = j-TODS, volume = "34", number = "3", pages = "18:1--18:??", month = aug, year = "2009", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1567274.1567280", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Aug 31 16:11:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Constraints are important, not only for maintaining data integrity, but also because they capture natural probabilistic dependencies among data items. A {\em probabilistic XML database\/} (PXDB) is the probability subspace comprising the instances of a {\em p-document\/} that satisfy a set of constraints. In contrast to existing models that can express probabilistic dependencies, it is shown that query evaluation is tractable in PXDBs. The problems of sampling and determining well-definedness (i.e., whether the aforesaid subspace is nonempty) are also tractable. Furthermore, queries and constraints can include the aggregate functions {\em count, max, min,\/} and {\em ratio.\/} Finally, this approach can be easily extended to allow a probabilistic interpretation of constraints.", acknowledgement = ack-nhfb, articleno = "18", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "constraints; Probabilistic databases; probabilistic XML; sampling probabilistic data", } @Article{Sasha:2009:FTS, author = "Dennis Sasha and Maurizio Lenzerini and Z. Meral {\"O}zsoyo{\u{g}}lu", title = "Foreword to {TODS SIGMOD\slash PODS 2008} special issue", journal = j-TODS, volume = "34", number = "4", pages = "19:1--19:??", month = dec, year = "2009", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Mar 15 12:22:50 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "19", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Cahill:2009:SIS, author = "Michael J. Cahill and Uwe R{\"o}hm and Alan D. Fekete", title = "Serializable isolation for snapshot databases", journal = j-TODS, volume = "34", number = "4", pages = "20:1--20:??", month = dec, year = "2009", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Mar 15 12:22:50 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "20", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{He:2009:RQC, author = "Bingsheng He and Mian Lu and Ke Yang and Rui Fang and Naga K. Govindaraju and Qiong Luo and Pedro V. Sander", title = "Relational query coprocessing on graphics processors", journal = j-TODS, volume = "34", number = "4", pages = "21:1--21:??", month = dec, year = "2009", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Mar 15 12:22:50 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "21", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Arenas:2009:RSM, author = "Marcelo Arenas and Jorge P{\'e}rez and Cristian Riveros", title = "The recovery of a schema mapping: {Bringing} exchanged data back", journal = j-TODS, volume = "34", number = "4", pages = "22:1--22:??", month = dec, year = "2009", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Mar 15 12:22:50 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "22", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Abiteboul:2009:SAA, author = "Serge Abiteboul and Luc Segoufin and Victor Vianu", title = "Static analysis of active {XML} systems", journal = j-TODS, volume = "34", number = "4", pages = "23:1--23:??", month = dec, year = "2009", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Mar 15 12:22:50 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "23", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Chow:2009:CQP, author = "Chi-Yin Chow and Mohamed F. Mokbel and Walid G. Aref", title = "{Casper*}: {Query} processing for location services without compromising privacy", journal = j-TODS, volume = "34", number = "4", pages = "24:1--24:??", month = dec, year = "2009", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Mar 15 12:22:50 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "24", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Benedikt:2009:XRL, author = "Michael Benedikt and Christoph Koch", title = "From {XQuery} to relational logics", journal = j-TODS, volume = "34", number = "4", pages = "25:1--25:??", month = dec, year = "2009", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Mar 15 12:22:50 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "25", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Ghanem:2010:SVD, author = "Thanaa M. Ghanem and Ahmed K. Elmagarmid and Per-{\AA}ke Larson and Walid G. Aref", title = "Supporting views in data stream management systems", journal = j-TODS, volume = "35", number = "1", pages = "1:1--1:??", month = feb, year = "2010", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Mar 15 12:22:52 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "1", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Wu:2010:AML, author = "Kesheng Wu and Arie Shoshani and Kurt Stockinger", title = "Analyses of multi-level and multi-component compressed bitmap indexes", journal = j-TODS, volume = "35", number = "1", pages = "2:1--2:??", month = feb, year = "2010", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Mar 15 12:22:52 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "2", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Lian:2010:RSS, author = "Xiang Lian and Lei Chen", title = "Reverse skyline search in uncertain databases", journal = j-TODS, volume = "35", number = "1", pages = "3:1--3:??", month = feb, year = "2010", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Mar 15 12:22:52 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "3", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Augsten:2010:GDB, author = "Nikolaus Augsten and Michael B{\"o}hlen and Johann Gamper", title = "The $ p q $-gram distance between ordered labeled trees", journal = j-TODS, volume = "35", number = "1", pages = "4:1--4:??", month = feb, year = "2010", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Mar 15 12:22:52 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "4", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Kolahi:2010:ITA, author = "Solmaz Kolahi and Leonid Libkin", title = "An information-theoretic analysis of worst-case redundancy in database design", journal = j-TODS, volume = "35", number = "1", pages = "5:1--5:??", month = feb, year = "2010", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Mar 15 12:22:52 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "5", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Schnaitter:2010:OAE, author = "Karl Schnaitter and Neoklis Polyzotis", title = "Optimal algorithms for evaluating rank joins in database systems", journal = j-TODS, volume = "35", number = "1", pages = "6:1--6:??", month = feb, year = "2010", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Mar 15 12:22:52 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "6", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Soror:2010:AVM, author = "Ahmed A. Soror and Umar Farooq Minhas and Ashraf Aboulnaga and Kenneth Salem and Peter Kokosielis and Sunil Kamath", title = "Automatic virtual machine configuration for database workloads", journal = j-TODS, volume = "35", number = "1", pages = "7:1--7:??", month = feb, year = "2010", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Mar 15 12:22:52 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "7", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Xiao:2010:TAT, author = "Xiaokui Xiao and Yufei Tao and Nick Koudas", title = "Transparent anonymization: {Thwarting} adversaries who know the algorithm", journal = j-TODS, volume = "35", number = "2", pages = "8:1--8:??", month = apr, year = "2010", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1735886.1735887", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Apr 28 13:44:08 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Numerous generalization techniques have been proposed for privacy-preserving data publishing. Most existing techniques, however, implicitly assume that the adversary knows little about the anonymization algorithm adopted by the data publisher. Consequently, they cannot guard against privacy attacks that exploit various characteristics of the anonymization mechanism. This article provides a practical solution to this problem. First, we propose an analytical model for evaluating disclosure risks, when an adversary knows {\em everything\/} in the anonymization process, except the sensitive values. Based on this model, we develop a privacy principle, {\em transparent l-diversity}, which ensures privacy protection against such powerful adversaries. We identify three algorithms that achieve transparent $l$-diversity, and verify their effectiveness and efficiency through extensive experiments with real data.", acknowledgement = ack-nhfb, articleno = "8", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "generalization; l -diversity; Privacy-preserving data publishing", } @Article{U:2010:OMB, author = "Leong Hou U. and Kyriakos Mouratidis and Man Lung Yiu and Nikos Mamoulis", title = "Optimal matching between spatial datasets under capacity constraints", journal = j-TODS, volume = "35", number = "2", pages = "9:1--9:??", month = apr, year = "2010", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1735886.1735888", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Apr 28 13:44:08 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Consider a set of {\em customers\/} (e.g., WiFi receivers) and a set of {\em service providers\/} (e.g., wireless access points), where each provider has a {\em capacity\/} and the quality of service offered to its customers is anti-proportional to their distance. The {\em Capacity Constrained Assignment\/} (CCA) is a matching between the two sets such that (i) each customer is assigned to at most one provider, (ii) every provider serves no more customers than its capacity, (iii) the maximum possible number of customers are served, and (iv) the sum of Euclidean distances within the assigned provider-customer pairs is minimized. Although max-flow algorithms are applicable to this problem, they require the complete distance-based bipartite graph between the customer and provider sets. For large spatial datasets, this graph is expensive to compute and it may be too large to fit in main memory. Motivated by this fact, we propose efficient algorithms for {\em optimal assignment\/} that employ novel edge-pruning strategies, based on the spatial properties of the problem. Additionally, we develop incremental techniques that maintain an optimal assignment (in the presence of updates) with a processing cost several times lower than CCA recomputation from scratch. Finally, we present {\em approximate\/} (i.e., suboptimal) CCA solutions that provide a tunable trade-off between result accuracy and computation cost, abiding by theoretical quality guarantees. A thorough experimental evaluation demonstrates the efficiency and practicality of the proposed techniques.", acknowledgement = ack-nhfb, articleno = "9", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Optimal assignment; spatial databases", } @Article{Liu:2010:RSI, author = "Ziyang Liu and Yi Chen", title = "Return specification inference and result clustering for keyword search on {XML}", journal = j-TODS, volume = "35", number = "2", pages = "10:1--10:??", month = apr, year = "2010", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1735886.1735889", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Apr 28 13:44:08 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Keyword search enables Web users to easily access XML data without the need to learn a structured query language and to study possibly complex data schemas. Existing work has addressed the problem of selecting qualified data nodes that match keywords and connecting them in a meaningful way, in the spirit of inferring the {\em where clause\/} in XQuery. However, how to infer the {\em return clause\/} for keyword searches is an open problem.\par To address this challenge, we present a keyword search engine for data-centric XML, XSeek, to infer the semantics of the search and identify return nodes effectively. XSeek recognizes possible entities and attributes inherently represented in the data. It also distinguishes between predicates and return specifications in query keywords. Then based on the analysis of both XML data structures and keyword patterns, XSeek generates return nodes. Furthermore, when the query is ambiguous and it is hard or impossible to determine the desirable return information, XSeek clusters the query results according to their semantics based on the user-specified granularity, and enables the user to easily browse and select the desired ones. Extensive experimental studies show the effectiveness and efficiency of XSeek.", acknowledgement = ack-nhfb, articleno = "10", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "keyword search; result clustering; XML", } @Article{Bex:2010:ICR, author = "Geert Jan Bex and Frank Neven and Thomas Schwentick and Stijn Vansummeren", title = "Inference of concise regular expressions and {DTDs}", journal = j-TODS, volume = "35", number = "2", pages = "11:1--11:??", month = apr, year = "2010", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1735886.1735890", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Apr 28 13:44:08 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We consider the problem of inferring a concise Document Type Definition (DTD) for a given set of XML-documents, a problem that basically reduces to learning {\em concise\/} regular expressions from positive examples strings. We identify two classes of concise regular expressions --- the single occurrence regular expressions (SOREs) and the chain regular expressions (CHAREs) --- that capture the far majority of expressions used in practical DTDs. For the inference of SOREs we present several algorithms that first infer an automaton for a given set of example strings and then translate that automaton to a corresponding SORE, possibly repairing the automaton when no equivalent SORE can be found. In the process, we introduce a novel automaton to regular expression rewrite technique which is of independent interest. When only a very small amount of XML data is available, however (for instance when the data is generated by Web service requests or by answers to queries), these algorithms produce regular expressions that are too specific. Therefore, we introduce a novel learning algorithm crx that directly infers CHAREs (which form a subclass of SOREs) without going through an automaton representation. We show that crx performs very well within its target class on very small datasets.", acknowledgement = ack-nhfb, articleno = "11", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Regular expressions; schema inference; XML", } @Article{DeCapitaniDiVimercati:2010:EPR, author = "Sabrina {De Capitani Di Vimercati} and Sara Foresti and Sushil Jajodia and Stefano Paraboschi and Pierangela Samarati", title = "Encryption policies for regulating access to outsourced data", journal = j-TODS, volume = "35", number = "2", pages = "12:1--12:??", month = apr, year = "2010", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1735886.1735891", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Apr 28 13:44:08 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Current access control models typically assume that resources are under the strict custody of a trusted party which monitors each access request to verify if it is compliant with the specified access control policy. There are many scenarios where this approach is becoming no longer adequate. Many clear trends in Web technology are creating a need for owners of sensitive information to manage access to it by legitimate users using the services of {\em honest but curious\/} third parties, that is, parties trusted with providing the required service but not authorized to read the actual data content. In this scenario, the data owner encrypts the data before outsourcing and stores them at the server. Only the data owner and users with knowledge of the key will be able to decrypt the data. Possible access authorizations are to be enforced by the owner. In this article, we address the problem of enforcing selective access on outsourced data without need of involving the owner in the access control process. The solution puts forward a novel approach that combines cryptography with authorizations, thus enforcing access control via {\em selective encryption}. The article presents a formal model for access control management and illustrates how an authorization policy can be translated into an equivalent encryption policy while minimizing the amount of keys and cryptographic tokens to be managed. The article also introduces a two-layer encryption approach that allows the data owner to outsource, besides the data, the complete management of the authorization policy itself, thus providing efficiency and scalability in dealing with policy updates. We also discuss experimental results showing that our approach is able to efficiently manage complex scenarios.", acknowledgement = ack-nhfb, articleno = "12", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Data outsourcing; encryption policy; privacy", } @Article{Koutrika:2010:PQB, author = "Georgia Koutrika and Yannis Ioannidis", title = "Personalizing queries based on networks of composite preferences", journal = j-TODS, volume = "35", number = "2", pages = "13:1--13:??", month = apr, year = "2010", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1735886.1735892", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Apr 28 13:44:08 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "People's preferences are expressed at varying levels of granularity and detail as a result of partial or imperfect knowledge. One may have some preference for a general class of entities, for example, liking comedies, and another one for a fine-grained, specific class, such as disliking recent thrillers with Al Pacino. In this article, we are interested in capturing such complex, multi-granular preferences for personalizing database queries and in studying their impact on query results. We organize the collection of one's preferences in a {\em preference network\/} (a directed acyclic graph), where each node refers to a subclass of the entities that its parent refers to, and whenever they both apply, more specific preferences override more generic ones. We study query personalization based on networks of preferences and provide efficient algorithms for identifying relevant preferences, modifying queries accordingly, and processing personalized queries. Finally, we present results of both synthetic and real-user experiments, which: (a) demonstrate the efficiency of our algorithms, (b) provide insight as to the appropriateness of the proposed preference model, and (c) show the benefits of query personalization based on composite preferences compared to simpler preference representations.", acknowledgement = ack-nhfb, articleno = "13", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "personalization; Preference modeling; preference networks", } @Article{Flesca:2010:QRI, author = "Sergio Flesca and Filippo Furfaro and Francesco Parisi", title = "Querying and repairing inconsistent numerical databases", journal = j-TODS, volume = "35", number = "2", pages = "14:1--14:??", month = apr, year = "2010", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1735886.1735893", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Apr 28 13:44:08 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The problem of extracting consistent information from relational databases violating integrity constraints on numerical data is addressed. In particular, aggregate constraints defined as linear inequalities on aggregate-sum queries on input data are considered. The notion of repair as consistent set of updates at attribute-value level is exploited, and the characterization of several data-complexity issues related to repairing data and computing consistent query answers is provided. Moreover, a method for computing ``reasonable'' repairs of inconsistent numerical databases is provided, for a restricted but expressive class of aggregate constraints. Several experiments are presented which assess the effectiveness of the proposed approach in real-life application scenarios.", acknowledgement = ack-nhfb, articleno = "14", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "aggregate constraints; consistent query answer; Inconsistent databases; repairs", } @Article{Liu:2010:CIE, author = "Hongyan Liu and Xiaoyu Wang and Yinghui Yang", title = "Comments on {``An integrated efficient solution for computing frequent and top-$k$ elements in data streams''}", journal = j-TODS, volume = "35", number = "2", pages = "15:1--15:??", month = apr, year = "2010", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1735886.1735894", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Apr 28 13:44:08 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "See \cite{Metwally:2006:IES}.", abstract = "We investigate a well-known algorithm, {\em Space-Saving\/} [Metwally et al. 2006], which has been proven efficient and effective at mining frequent elements in data streams. We discovered an error in one of the theorems in Metwally et al. [2006]. Experiments are conducted to illustrate the error.", acknowledgement = ack-nhfb, articleno = "15", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "approximate queries; data streams; frequent elements; top-$k$ elements; Zipfian distributions", } @Article{Graefe:2010:SBT, author = "Goetz Graefe", title = "A survey of {B}-tree locking techniques", journal = j-TODS, volume = "35", number = "3", pages = "16:1--16:??", month = jul, year = "2010", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1806907.1806908", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jul 28 15:53:01 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "B-trees have been ubiquitous in database management systems for several decades, and they are used in other storage systems as well. Their basic structure and basic operations are well and widely understood including search, insertion, and deletion. Concurrency control of operations in B-trees, however, is perceived as a difficult subject with many subtleties and special cases. The purpose of this survey is to clarify, simplify, and structure the topic of concurrency control in B-trees by dividing it into two subtopics and exploring each of them in depth.", acknowledgement = ack-nhfb, articleno = "16", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Chen:2010:COI, author = "Su Chen and Mario A. Nascimento and Beng Chin Ooi and Kian-Lee Tan", title = "Continuous online index tuning in moving object databases", journal = j-TODS, volume = "35", number = "3", pages = "17:1--17:??", month = jul, year = "2010", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1806907.1806909", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jul 28 15:53:01 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In a {\em Moving Object Database\/} (MOD), the dataset, for example, the location of objects and their distribution, and the workload change frequently. Traditional static indexes are not able to cope well with such changes, that is, their effectiveness and efficiency are seriously affected. This calls for the development of novel indexes that can be reconfigured automatically based on the state of the system. In this article, we design and present the ST$^2$ B-tree, a {\em S\/}elf-{\em T\/}unable {\em S\/}patio-{\em T\/}emporal {\em B\/}$^+$ -tree index for MODs. In ST$^2$ B-tree, the data space is partitioned into regions of different density with respect to a set of reference points. Based on the density, objects in a region are managed using a grid of appropriate granularity; intuitively, a dense region employs a grid with fine granularity, while a sparse region uses a grid with coarse granularity. In this way, the ST$^2$ B-tree adapts itself to workload diversity in space. To enable online tuning, the ST$^2$ B-tree employs a ``multitree'' indexing technique. The underlying B$^+$-tree is logically divided into two subtrees. Objects are dispatched to either subtree depending on their last update time. The two subtrees are rebuilt periodically and alternately. Whenever a subtree is rebuilt, it is tuned to optimize performance by picking an appropriate setting (e.g., the set of reference points and grid granularity) based on the most recent data and workload. To cut down the overhead of rebuilding, we propose an eager update technique to construct the subtree. Finally, we present a tuning framework for the ST$^2$ B-tree, where the tuning is conducted online and automatically without human intervention, and without interfering with the regular functions of the MOD. We have implemented the tuning framework and the ST$^2$ B-tree, and conducted extensive performance evaluations. The results show that the self-tuning mechanism minimizes the degradation of performance caused by workload changes without any noticeable overhead.", acknowledgement = ack-nhfb, articleno = "17", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Data distribution; index tuning; location-based services; moving object indexing; self-tuning", } @Article{Hu:2010:PAL, author = "Haibo Hu and Jianliang Xu and Sai Tung On and Jing Du and Joseph Kee-Yin Ng", title = "Privacy-aware location data publishing", journal = j-TODS, volume = "35", number = "3", pages = "18:1--18:??", month = jul, year = "2010", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1806907.1806910", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jul 28 15:53:01 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "This article examines a new problem of $k$-anonymity with respect to a reference dataset in privacy-aware location data publishing: given a user dataset and a sensitive event dataset, we want to generalize the user dataset such that by joining it with the event dataset through location, each event is covered by at least $k$ users. Existing $k$ -anonymity algorithms generalize every $k$ user locations to the same vague value, regardless of the events. Therefore, they tend to overprotect against the privacy compromise and make the published data less useful. In this article, we propose a new generalization paradigm called {\em local enlargement}, as opposed to conventional hierarchy- or partition-based generalization. Local enlargement guarantees that user locations are enlarged just enough to cover all events $k$ times, and thus maximize the usefulness of the published data. We develop an $ O(H_n) $-approximate algorithm under the local enlargement paradigm, where $n$ is the maximum number of events a user could possibly cover and $ H_n $ is the Harmonic number of $n$. With strong pruning techniques and mathematical analysis, we show that it runs efficiently and that the generalized user locations are up to several orders of magnitude smaller than those by the existing algorithms. In addition, it is robust enough to protect against various privacy attacks.", acknowledgement = ack-nhfb, articleno = "18", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "k -anonymity; location privacy", } @Article{Liu:2010:IXS, author = "Ziyang Liu and Yu Huang and Yi Chen", title = "Improving {XML} search by generating and utilizing informative result snippets", journal = j-TODS, volume = "35", number = "3", pages = "19:1--19:??", month = jul, year = "2010", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1806907.1806911", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jul 28 15:53:01 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Snippets are used by almost every text search engine to complement the ranking scheme in order to effectively handle user searches, which are inherently ambiguous and whose relevance semantics are difficult to assess. Despite the fact that XML is a standard representation format of Web data, research on generating result snippets for XML search remains limited.\par To tackle this important yet open problem, in this article, we present a system extract which generates snippets for XML search results. We identify that a good XML result snippet should be a meaningful information unit of a small size that effectively summarizes this query result and differentiates it from others, according to which users can quickly assess the relevance of the query result. We have designed and implemented a novel algorithm to satisfy these requirements. Furthermore, we propose to cluster the query results based on their snippets. Since XML result clustering can only be done at query time, snippet-based clustering significantly improves the efficiency while compromising little clustering accuracy. We verified the efficiency and effectiveness of our approach through experiments.", acknowledgement = ack-nhfb, articleno = "19", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "clustering; keyword search; snippets; XML", } @Article{Tao:2010:EAN, author = "Yufei Tao and Ke Yi and Cheng Sheng and Panos Kalnis", title = "Efficient and accurate nearest neighbor and closest pair search in high-dimensional space", journal = j-TODS, volume = "35", number = "3", pages = "20:1--20:??", month = jul, year = "2010", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1806907.1806912", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jul 28 15:53:01 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Nearest Neighbor (NN) search in high-dimensional space is an important problem in many applications. From the database perspective, a good solution needs to have two properties: (i) it can be easily incorporated in a relational database, and (ii) its query cost should increase {\em sublinearly\/} with the dataset size, regardless of the data and query distributions. {\em Locality-Sensitive Hashing\/} (LSH) is a well-known methodology fulfilling both requirements, but its current implementations either incur expensive space and query cost, or abandon its theoretical guarantee on the quality of query results.\par Motivated by this, we improve LSH by proposing an access method called the {\em Locality-Sensitive B-tree\/} (LSB-tree) to enable fast, accurate, high-dimensional NN search in relational databases. The combination of several LSB-trees forms a {\em LSB-forest\/} that has strong quality guarantees, but improves dramatically the efficiency of the previous LSH implementation having the same guarantees. In practice, the LSB-tree itself is also an effective index which consumes linear space, supports efficient updates, and provides accurate query results. In our experiments, the LSB-tree was faster than: (i) iDistance (a famous technique for exact NN search) by two orders of magnitude, and (ii) MedRank (a recent approximate method with nontrivial quality guarantees) by one order of magnitude, and meanwhile returned much better results.\par As a second step, we extend our LSB technique to solve another classic problem, called Closest Pair (CP) search, in high-dimensional space. The long-term challenge for this problem has been to achieve {\em subquadratic\/} running time at very high dimensionalities, which fails most of the existing solutions. We show that, using a LSB-forest, CP search can be accomplished in (worst-case) time significantly lower than the quadratic complexity, yet still ensuring very good quality. In practice, accurate answers can be found using just two LSB-trees, thus giving a substantial reduction in the space and running time. In our experiments, our technique was faster: (i) than distance browsing (a well-known method for solving the problem exactly) by several orders of magnitude, and (ii) than D-shift (an approximate approach with theoretical guarantees in low-dimensional space) by one order of magnitude, and at the same time, outputs better results.", acknowledgement = ack-nhfb, articleno = "20", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "closest pair search; Locality-sensitive hashing; nearest neighbor search", } @Article{Nash:2010:VQD, author = "Alan Nash and Luc Segoufin and Victor Vianu", title = "Views and queries: {Determinacy} and rewriting", journal = j-TODS, volume = "35", number = "3", pages = "21:1--21:??", month = jul, year = "2010", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1806907.1806913", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jul 28 15:53:01 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We investigate the question of whether a query $Q$ can be answered using a set $V$ of views. We first define the problem in information-theoretic terms: we say that $V$ determines $Q$ if $V$ provides enough information to uniquely determine the answer to $Q$. Next, we look at the problem of rewriting $Q$ in terms of $V$ using a specific language. Given a view language $V$ and query language $Q$, we say that a rewriting language $R$ is complete for $V$-to-$Q$ rewritings if every $ Q \in Q $ can be rewritten in terms of $ V \in V $ using a query in $R$, whenever $V$ determines $Q$. While query rewriting using views has been extensively investigated for some specific languages, the connection to the information-theoretic notion of determinacy, and the question of completeness of a rewriting language have received little attention. In this article we investigate systematically the notion of determinacy and its connection to rewriting. The results concern decidability of determinacy for various view and query languages, as well as the power required of complete rewriting languages. We consider languages ranging from first-order to conjunctive queries.", acknowledgement = ack-nhfb, articleno = "21", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "Queries; rewriting; views", } @Article{Denecker:2010:TLR, author = "Marc Denecker and {\'A}lvaro Cort{\'e}s-Calabuig and Maurice Bruynooghes and Ofer Arieli", title = "Towards a logical reconstruction of a theory for locally closed databases", journal = j-TODS, volume = "35", number = "3", pages = "22:1--22:??", month = jul, year = "2010", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1806907.1806914", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jul 28 15:53:01 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The {\em Closed World Assumption\/} (CWA) on databases expresses the assumption that an atom not in the database is false. This assumption is applicable only in cases where the database has complete knowledge about the domain of discourse. In this article, we investigate {\em locally closed\/} databases, that is: databases that are sound but partially incomplete about their domain. Such databases consist of a standard database instance, augmented with a collection of {\em Local Closed World Assumptions\/} (LCWAs). A LCWA is a ``local'' form of the CWA, expressing that a database relation is complete in a certain area, called a {\em window of expertise}. In this work, we study locally closed databases both from a knowledge representation and from a computational perspective. At the representation level, the approach taken in this article distinguishes between the data that is conveyed by a database and the metaknowledge about the area in which the data is complete. We study the semantics of the LCWA's and relate it to several knowledge representation formalisms. At the reasoning level, we study the complexity of, and algorithms for two basic reasoning tasks: computing {\em certain\/} and {\em possible\/} answers to queries and determining whether a database has complete knowledge on a query. As the complexity of these tasks is unacceptably high, we develop efficient {\em approximate\/} methods for query answering. We also prove that for useful classes of queries and locally closed databases, these methods are {\em optimal}, and thus they solve the original query in a tractable way. As a result, we obtain classes of queries and locally closed databases for which query answering is tractable.", acknowledgement = ack-nhfb, articleno = "22", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", keywords = "closed world assumption; Databases; locally closed databases", } @Article{Ozsoyoglu:2010:FTI, author = "Z. Meral {\"O}zsoyoglu", title = "Foreword to {TODS} invited papers issue", journal = j-TODS, volume = "35", number = "4", pages = "23:1--23:??", month = nov, year = "2010", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1862919.1862920", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Dec 15 10:34:39 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "23", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Ivanova:2010:ARI, author = "Milena G. Ivanova and Martin L. Kersten and Niels J. Nes and Romulo A. P. Gon{\c{c}}alves", title = "An architecture for recycling intermediates in a column-store", journal = j-TODS, volume = "35", number = "4", pages = "24:1--24:??", month = nov, year = "2010", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1862919.1862921", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Dec 15 10:34:39 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Automatic recycling of intermediate results to improve both query response time and throughput is a grand challenge for state-of-the-art databases. Tuples are loaded and streamed through a tuple-at-a-time processing pipeline, avoiding materialization of intermediates as much as possible. This limits the opportunities for reuse of overlapping computations to DBA-defined materialized views and function/result cache tuning. In contrast, the operator-at-a-time execution paradigm produces fully materialized results in each step of the query plan. To avoid resource contention, these intermediates are evicted as soon as possible. In this article we study an architecture that harvests the byproducts of the operator-at-a-time paradigm in a column-store system using a lightweight mechanism, the recycler.", acknowledgement = ack-nhfb, articleno = "24", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Ghoting:2010:EAS, author = "Amol Ghoting and Konstantin Makarychev", title = "{I/O} efficient algorithms for serial and parallel suffix tree construction", journal = j-TODS, volume = "35", number = "4", pages = "25:1--25:??", month = nov, year = "2010", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1862919.1862922", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Dec 15 10:34:39 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Over the past three decades, the suffix tree has served as a fundamental data structure in string processing. However, its widespread applicability has been hindered due to the fact that suffix tree construction does not scale well with the size of the input string. With advances in data collection and storage technologies, large strings have become ubiquitous, especially across emerging applications involving text, time series, and biological sequence data. To benefit from these advances, it is imperative that we have a scalable suffix tree construction algorithm. The past few years have seen the emergence of several disk-based suffix tree construction algorithms.", acknowledgement = ack-nhfb, articleno = "25", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Berinde:2010:SOH, author = "Radu Berinde and Piotr Indyk and Graham Cormode and Martin J. Strauss", title = "Space-optimal heavy hitters with strong error bounds", journal = j-TODS, volume = "35", number = "4", pages = "26:1--26:??", month = nov, year = "2010", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1862919.1862923", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Dec 15 10:34:39 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The problem of finding heavy hitters and approximating the frequencies of items is at the heart of many problems in data stream analysis. It has been observed that several proposed solutions to this problem can outperform their worst-case guarantees on real data. This leads to the question of whether some stronger bounds can be guaranteed. We answer this in the positive by showing that a class of counter-based algorithms (including the popular and very space-efficient Frequent and SpacesSaving algorithms) provides much stronger approximation guarantees than previously known. Specifically, we show that errors in the approximation of individual elements do not depend on the frequencies of the most frequent elements, but only on the frequency of the remaining tail.", acknowledgement = ack-nhfb, articleno = "26", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Fan:2010:RIC, author = "Wenfei Fan and Floris Geerts", title = "Relative information completeness", journal = j-TODS, volume = "35", number = "4", pages = "27:1--27:??", month = nov, year = "2010", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1862919.1862924", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Dec 15 10:34:39 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "This article investigates the question of whether a partially closed database has complete information to answer a query. In practice an enterprise often maintains master data $ D m $, a closed-world database. We say that a database $D$ is partially closed if it satisfies a set $V$ of containment constraints of the form $ q(D) \subset p(D m) $, where $q$ is a query in a language $ L C $ and $p$ is a projection query. The part of $D$ not constrained by $ (D m, V) $ is open, from which some tuples may be missing. The database $D$ is said to be complete for a query $Q$ relative to $ (D m, V) $ if for all partially closed extensions $ D' $ of $D$, $ Q(D') = Q(D) $, i.e., adding tuples to $D$ either violates some constraints in $V$ or does not change the answer to $Q$.", acknowledgement = ack-nhfb, articleno = "27", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Fletcher:2010:TTS, author = "George H. L. Fletcher and Jan {Van Den Bussche} and Dirk {Van Gucht} and Stijn Vansummeren", title = "Towards a theory of search queries", journal = j-TODS, volume = "35", number = "4", pages = "28:1--28:??", month = nov, year = "2010", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1862919.1862925", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Dec 15 10:34:39 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The need to manage diverse information sources has triggered the rise of very loosely structured data models, known as dataspace models. Such information management systems must allow querying in simple ways, mostly by a form of searching. Motivated by these developments, we propose a theory of search queries in a general model of dataspaces. In this model, a dataspace is a collection of data objects, where each data object is a collection of data items. Basic search queries are expressed using filters on data items, following the basic model of Boolean search in information retrieval. We characterize semantically the class of queries that can be expressed by searching.", acknowledgement = ack-nhfb, articleno = "28", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Bjorklund:2010:IXE, author = "Henrik Bj{\"o}rklund and Wouter Gelade and Wim Martens", title = "Incremental {XPath} evaluation", journal = j-TODS, volume = "35", number = "4", pages = "29:1--29:??", month = nov, year = "2010", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1862919.1862926", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Dec 15 10:34:39 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Incremental view maintenance for XPath queries asks to maintain a materialized XPath view over an XML database. It assumes an underlying XML database D and a query Q. One is given a sequence of updates U to D, and the problem is to compute the result of Q(U(D)): the result of evaluating query Q on database D after having applied updates U. This article initiates a systematic study of the Boolean version of this problem. In the Boolean version, one only wants to know whether Q(U(D)) is empty or not. In order to quickly answer this question, we are allowed to maintain an auxiliary data structure.", acknowledgement = ack-nhfb, articleno = "29", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Wang:2011:OPD, author = "Ting Wang and Ling Liu", title = "Output privacy in data mining", journal = j-TODS, volume = "36", number = "1", pages = "1:1--1:??", month = mar, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1929934.1929935", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Mar 16 09:42:23 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Privacy has been identified as a vital requirement in designing and implementing data mining systems. In general, privacy preservation demands protecting both input and output privacy: the former refers to sanitizing the raw data itself before performing mining; while the latter refers to preventing the mining output (models or patterns) from malicious inference attacks.", acknowledgement = ack-nhfb, articleno = "1", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Nergiz:2011:IA, author = "Mehmet Ercan Nergiz and Acar Tamersoy and Yucel Saygin", title = "Instant anonymization", journal = j-TODS, volume = "36", number = "1", pages = "2:1--2:??", month = mar, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1929934.1929936", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Mar 16 09:42:23 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Anonymization-based privacy protection ensures that data cannot be traced back to individuals. Researchers working in this area have proposed a wide variety of anonymization algorithms, many of which require a considerable number of database accesses. This is a problem of efficiency, especially when the released data is subject to visualization or when the algorithm needs to be run many times to get an acceptable ratio of privacy/utility. In this paper, we present two instant anonymization algorithms for the privacy metrics k-anonymity and $ \ell $-diversity. Proposed algorithms minimize the number of data accesses by utilizing the summary structure already maintained by the database management system for query selectivity.", acknowledgement = ack-nhfb, articleno = "2", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Gross-Amblard:2011:QPW, author = "David Gross-Amblard", title = "Query-preserving watermarking of relational databases and {XML} documents", journal = j-TODS, volume = "36", number = "1", pages = "3:1--3:??", month = mar, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1929934.1929937", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Mar 16 09:42:23 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Watermarking allows robust and unobtrusive insertion of information in a digital document. During the last few years, techniques have been proposed for watermarking relational databases or Xml documents, where information insertion must preserve a specific measure on data (for example the mean and variance of numerical attributes). In this article we investigate the problem of watermarking databases or Xml while preserving a set of parametric queries in a specified language, up to an acceptable distortion. We first show that unrestricted databases can not be watermarked while preserving trivial parametric queries. We then exhibit query languages and classes of structures that allow guaranteed watermarking capacity, namely (1) local query languages on structures with bounded degree Gaifman graph, and (2) monadic second-order queries on trees or treelike structures.", acknowledgement = ack-nhfb, articleno = "3", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Termehchy:2011:USI, author = "Arash Termehchy and Marianne Winslett", title = "Using structural information in {XML} keyword search effectively", journal = j-TODS, volume = "36", number = "1", pages = "4:1--4:??", month = mar, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1929934.1929938", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Mar 16 09:42:23 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The popularity of XML has exacerbated the need for an easy-to-use, high precision query interface for XML data. When traditional document-oriented keyword search techniques do not suffice, natural language interfaces and keyword search techniques that take advantage of XML structure make it very easy for ordinary users to query XML databases. Unfortunately, current approaches to processing these queries rely heavily on heuristics that are intuitively appealing but ultimately ad hoc. These approaches often retrieve false positive answers, overlook correct answers, and cannot rank answers appropriately. To address these problems for data-centric XML, we propose coherency ranking (CR), a domain- and database design-independent ranking method for XML keyword queries that is based on an extension of the concepts of data dependencies and mutual information.", acknowledgement = ack-nhfb, articleno = "4", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Cautis:2011:QXD, author = "Bogdan Cautis and Alin Deutsch and Nicola Onose and Vasilis Vassalos", title = "Querying {XML} data sources that export very large sets of views", journal = j-TODS, volume = "36", number = "1", pages = "5:1--5:??", month = mar, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1929934.1929939", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Mar 16 09:42:23 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We study the problem of querying XML data sources that accept only a limited set of queries, such as sources accessible by Web services which can implement very large (potentially infinite) families of XPath queries. To compactly specify such families of queries we adopt the Query Set Specifications, a formalism close to context-free grammars. We say that query Q is expressible by the specification P if it is equivalent to some expansion of P. Q is supported by P if it has an equivalent rewriting using some finite set of P's expansions. We study the complexity of expressibility and support and identify large classes of XPath queries for which there are efficient (PTIME) algorithms.", acknowledgement = ack-nhfb, articleno = "5", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Liu:2011:GSW, author = "Ziyang Liu and Susan B. Davidson and Yi Chen", title = "Generating sound workflow views for correct provenance analysis", journal = j-TODS, volume = "36", number = "1", pages = "6:1--6:??", month = mar, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1929934.1929940", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Mar 16 09:42:23 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Workflow views abstract groups of tasks in a workflow into high level composite tasks, in order to reuse subworkflows and facilitate provenance analysis. However, unless a view is carefully designed, it may not preserve the dataflow between tasks in the workflow, that is, it may not be sound. Unsound views can be misleading and cause incorrect provenance analysis. This article studies the problem of efficiently identifying and correcting unsound workflow views with minimal changes, and constructing minimal sound and elucidative workflow views with a set of user-specified relevant tasks. In particular, two related problems are investigated. First, given a workflow view, we wish to split each unsound composite task into the minimal number of tasks, such that the resulting view is sound.", acknowledgement = ack-nhfb, articleno = "6", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Jin:2011:PTE, author = "Ruoming Jin and Ning Ruan and Yang Xiang and Haixun Wang", title = "Path-tree: {An} efficient reachability indexing scheme for large directed graphs", journal = j-TODS, volume = "36", number = "1", pages = "7:1--7:??", month = mar, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1929934.1929941", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Mar 16 09:42:23 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Reachability query is one of the fundamental queries in graph database. The main idea behind answering reachability queries is to assign vertices with certain labels such that the reachability between any two vertices can be determined by the labeling information. Though several approaches have been proposed for building these reachability labels, it remains open issues on how to handle increasingly large number of vertices in real-world graphs, and how to find the best tradeoff among the labeling size, the query answering time, and the construction time. In this article, we introduce a novel graph structure, referred to as path-tree, to help labeling very large graphs.", acknowledgement = ack-nhfb, articleno = "7", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Law:2011:RLD, author = "Yan-Nei Law and Haixun Wang and Carlo Zaniolo", title = "Relational languages and data models for continuous queries on sequences and data streams", journal = j-TODS, volume = "36", number = "2", pages = "8:1--8:??", month = may, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1966385.1966386", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 3 18:41:49 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Most data stream management systems are based on extensions of the relational data model and query languages, but rigorous analyses of the problems and limitations of this approach, and how to overcome them, are still wanting. In this article, we elucidate the interaction between stream-oriented extensions of the relational model and continuous query language constructs, and show that the resulting expressive power problems are even more serious for data streams than for databases. In particular, we study the loss of expressive power caused by the loss of blocking query operators, and characterize nonblocking queries as monotonic functions on the database. Thus we introduce the notion of NB-completeness to assure that a query language is as suitable for continuous queries as it is for traditional database queries.", acknowledgement = ack-nhfb, articleno = "8", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Gao:2011:CNN, author = "Yunjun Gao and Baihua Zheng and Gang Chen and Chun Chen and Qing Li", title = "Continuous nearest-neighbor search in the presence of obstacles", journal = j-TODS, volume = "36", number = "2", pages = "9:1--9:??", month = may, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1966385.1966387", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 3 18:41:49 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Despite the ubiquity of physical obstacles (e.g., buildings, hills, and blindages, etc.) in the real world, most of spatial queries ignore the obstacles. In this article, we study a novel form of continuous nearest-neighbor queries in the presence of obstacles, namely continuous obstructed nearest-neighbor (CONN) search, which considers the impact of obstacles on the distance between objects. Given a data set $P$, an obstacle set $O$, and a query line segment $q$, in a two-dimensional space, a CONN query retrieves the nearest neighbor $ p \in P $ of each point $ p^\prime $ on $q$ according to the obstructed distance, the shortest path between $p$ and $ p^\prime $ without crossing any obstacle in $O$.", acknowledgement = ack-nhfb, articleno = "9", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Yiu:2011:DAR, author = "Man Lung Yiu and Christian S. Jensen and Jesper M{\o}ller and Hua Lu", title = "Design and analysis of a ranking approach to private location-based services", journal = j-TODS, volume = "36", number = "2", pages = "10:1--10:??", month = may, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1966385.1966388", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 3 18:41:49 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Users of mobile services wish to retrieve nearby points of interest without disclosing their locations to the services. This article addresses the challenge of optimizing the query performance while satisfying given location privacy and query accuracy requirements. The article's proposal, SpaceTwist, aims to offer location privacy for k nearest neighbor (kNN) queries at low communication cost without requiring a trusted anonymizer. The solution can be used with a conventional DBMS as well as with a server optimized for location-based services. In particular, we believe that this is the first solution that expresses the server-side functionality in a single SQL statement. In its basic form, SpaceTwist utilizes well-known incremental NN query processing on the server.", acknowledgement = ack-nhfb, articleno = "10", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Fagin:2011:RDE, author = "Ronald Fagin and Phokion G. Kolaitis and Lucian Popa and Wang-Chiew Tan", title = "Reverse data exchange: {Coping} with nulls", journal = j-TODS, volume = "36", number = "2", pages = "11:1--11:??", month = may, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1966385.1966389", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 3 18:41:49 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "An inverse of a schema mapping M is intended to undo what M does, thus providing a way to perform reverse data exchange. In recent years, three different formalizations of this concept have been introduced and studied, namely the notions of an inverse of a schema mapping, a quasi-inverse of a schema mapping, and a maximum recovery of a schema mapping. The study of these notions has been carried out in the context in which source instances are restricted to consist entirely of constants, while target instances may contain both constants and labeled nulls. This restriction on source instances is crucial for obtaining some of the main technical results about these three notions, but, at the same time, limits their usefulness, since reverse data exchange naturally leads to source instances that may contain both constants and labeled nulls.", acknowledgement = ack-nhfb, articleno = "11", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Atallah:2011:AEA, author = "Mikhail J. Atallah and Yinian Qi and Hao Yuan", title = "Asymptotically efficient algorithms for skyline probabilities of uncertain data", journal = j-TODS, volume = "36", number = "2", pages = "12:1--12:28", month = may, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1966385.1966390", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 3 18:41:49 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Skyline computation is widely used in multicriteria decision making. As research in uncertain databases draws increasing attention, skyline queries with uncertain data have also been studied. Some earlier work focused on probabilistic skylines with a given threshold; Atallah and Qi [2009] studied the problem to compute skyline probabilities for all instances of uncertain objects without the use of thresholds, and proposed an algorithm with subquadratic time complexity. In this work, we propose a new algorithm for computing all skyline probabilities that is asymptotically faster: worst-case $ O(n \sqrt {n} \log n) $ time and $ O(n) $ space for 2D data; $ O(n^{2 - 1 / d} \log^{d - 1} n) $ time and $ O(n \log^{d - 2} n) $ space for $d$-dimensional data. Furthermore, we study the online version of the problem: Given any query point $p$ (unknown until the query time), return the probability that no instance in the given data set dominates $p$. We propose an algorithm for answering such an online query for $d$-dimensional data in $ O(n^{1 1 / d} \log^{d 1} n) $ time after preprocessing the data in $ O(n^{2 1 / d} \log^{d 1}) $ time and space.", acknowledgement = ack-nhfb, articleno = "12", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Lee:2011:RAF, author = "Tony T. Lee and Tong Ye", title = "A relational approach to functional decomposition of logic circuits", journal = j-TODS, volume = "36", number = "2", pages = "13:1--13:??", month = may, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1966385.1966391", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 3 18:41:49 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Functional decomposition of Boolean functions has a profound influence on all quality aspects of cost-effectively implementing modern digital systems and data-mining. The relational databases are multivalued tables, which include any truth tables of logic functions as special cases. In this article, we propose a relational database approach to the decomposition of logic circuits. The relational algebra consists of a set of well-defined algebraic operations that can be performed on multivalued tables. Our approach shows that the functional decomposition of logic circuits is similar to the normalization of relational databases; they are governed by the same concepts of functional dependency (FD) and multivalued dependency (MVD).", acknowledgement = ack-nhfb, articleno = "13", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Hernich:2011:CWD, author = "Andr{\'e} Hernich and Leonid Libkin and Nicole Schweikardt", title = "Closed world data exchange", journal = j-TODS, volume = "36", number = "2", pages = "14:1--14:40", month = may, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/1966385.1966392", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 3 18:41:49 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Data exchange deals with translating data structured in some source format into data structured in some target format, given a specification of the relationship between the source and the target and possibly constraints on the target; and answering queries over the target in a way that is semantically consistent with the information in the source. Theoretical foundations of data exchange have been actively explored recently. It was also noticed that the standard semantics for query answering in data exchange may lead to counterintuitive or anomalous answers. In the present article, we explain that this behavior is due to the fact that solutions can contain invented information (information that is not related to the source instance), and that the presence of incomplete information in target instances has been ignored.", acknowledgement = ack-nhfb, articleno = "14", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Xiao:2011:ESJ, author = "Chuan Xiao and Wei Wang and Xuemin Lin and Jeffrey Xu Yu and Guoren Wang", title = "Efficient similarity joins for near-duplicate detection", journal = j-TODS, volume = "36", number = "3", pages = "15:1--15:41", month = aug, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2000824.2000825", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Aug 23 18:27:45 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "With the increasing amount of data and the need to integrate data from multiple data sources, one of the challenging issues is to identify near-duplicate records efficiently. In this article, we focus on efficient algorithms to find a pair of records such that their similarities are no less than a given threshold. Several existing algorithms rely on the prefix filtering principle to avoid computing similarity values for all possible pairs of records. We propose new filtering techniques by exploiting the token ordering information; they are integrated into the existing methods and drastically reduce the candidate sizes and hence improve the efficiency.", acknowledgement = ack-nhfb, articleno = "15", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Song:2011:DDR, author = "Shaoxu Song and Lei Chen", title = "Differential dependencies: Reasoning and discovery", journal = j-TODS, volume = "36", number = "3", pages = "16:1--16:41", month = aug, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2000824.2000826", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Aug 23 18:27:45 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "See comments \cite{Vincent:2015:TCD} and response \cite{Song:2017:RDD}.", abstract = "The importance of difference semantics (e.g., ``similar'' or ``dissimilar'') has been recently recognized for declaring dependencies among various types of data, such as numerical values or text values. We propose a novel form of Differential Dependencies (dds), which specifies constraints on difference, called differential functions, instead of identification functions in traditional dependency notations like functional dependencies. Informally, a differential dependency states that if two tuples have distances on attributes X agreeing with a certain differential function, then their distances on attributes Y should also agree with the corresponding differential function on Y. For example, [date($ \leq 7 $ )] $ \rightarrow $ [price($ < 100 $ )] states that the price difference of any two days within a week length should be no greater than 100 dollars.", acknowledgement = ack-nhfb, articleno = "16", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Papapetrou:2011:EBS, author = "Panagiotis Papapetrou and Vassilis Athitsos and Michalis Potamias and George Kollios and Dimitrios Gunopulos", title = "Embedding-based subsequence matching in time-series databases", journal = j-TODS, volume = "36", number = "3", pages = "17:1--17:39", month = aug, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2000824.2000827", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Aug 23 18:27:45 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We propose an embedding-based framework for subsequence matching in time-series databases that improves the efficiency of processing subsequence matching queries under the Dynamic Time Warping (DTW) distance measure. This framework partially reduces subsequence matching to vector matching, using an embedding that maps each query sequence to a vector and each database time series into a sequence of vectors. The database embedding is computed offline, as a preprocessing step. At runtime, given a query object, an embedding of that object is computed online. Relatively few areas of interest are efficiently identified in the database sequences by comparing the embedding of the query with the database vectors.", acknowledgement = ack-nhfb, articleno = "17", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Jampani:2011:MCD, author = "Ravi Jampani and Fei Xu and Mingxi Wu and Luis Perez and Chris Jermaine and Peter J. Haas", title = "The {Monte Carlo} database system: Stochastic analysis close to the data", journal = j-TODS, volume = "36", number = "3", pages = "18:1--18:41", month = aug, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2000824.2000828", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Aug 23 18:27:45 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The application of stochastic models and analysis techniques to large datasets is now commonplace. Unfortunately, in practice this usually means extracting data from a database system into an external tool (such as SAS, R, Arena, or Matlab), and then running the analysis there. This extract-and-model paradigm is typically error-prone, slow, does not support fine-grained modeling, and discourages what-if and sensitivity analyses. In this article we describe MCDB, a database system that permits a wide spectrum of stochastic models to be used in conjunction with the data stored in a large database, without ever extracting the data. MCDB facilitates in-database execution of tasks such as risk assessment, prediction, and imputation of missing data, as well as management of errors due to data integration, information extraction, and privacy-preserving data anonymization.", acknowledgement = ack-nhfb, articleno = "18", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Stefanidis:2011:SRC, author = "Kostas Stefanidis and Georgia Koutrika and Evaggelia Pitoura", title = "A survey on representation, composition and application of preferences in database systems", journal = j-TODS, volume = "36", number = "3", pages = "19:1--19:45", month = aug, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2000824.2000829", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Aug 23 18:27:45 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Preferences have been traditionally studied in philosophy, psychology, and economics and applied to decision making problems. Recently, they have attracted the attention of researchers in other fields, such as databases where they capture soft criteria for queries. Databases bring a whole fresh perspective to the study of preferences, both computational and representational. From a representational perspective, the central question is how we can effectively represent preferences and incorporate them in database querying. From a computational perspective, we can look at how we can efficiently process preferences in the context of database queries. Several approaches have been proposed but a systematic study of these works is missing.", acknowledgement = ack-nhfb, articleno = "19", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Ozsoyoglu:2011:FTI, author = "Z. Meral {\"O}zsoyoglu", title = "Foreword to {TODS} invited papers issue 2011", journal = j-TODS, volume = "36", number = "4", pages = "20:1--20:2", month = dec, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2043652.2043653", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 20 07:23:02 MST 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "20", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Cheng:2011:FMC, author = "James Cheng and Yiping Ke and Ada Wai-Chee Fu and Jeffrey Xu Yu and Linhong Zhu", title = "Finding maximal cliques in massive networks", journal = j-TODS, volume = "36", number = "4", pages = "21:1--21:34", month = dec, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2043652.2043654", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 20 07:23:02 MST 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Maximal clique enumeration is a fundamental problem in graph theory and has important applications in many areas such as social network analysis and bioinformatics. The problem is extensively studied; however, the best existing algorithms require memory space linear in the size of the input graph. This has become a serious concern in view of the massive volume of today's fast-growing networks. We propose a general framework for designing external-memory algorithms for maximal clique enumeration in large graphs. The general framework enables maximal clique enumeration to be processed recursively in small subgraphs of the input graph, thus allowing in-memory computation of maximal cliques without the costly random disk access.", acknowledgement = ack-nhfb, articleno = "21", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Kim:2011:DFA, author = "Changkyu Kim and Jatin Chhugani and Nadathur Satish and Eric Sedlar and Anthony D. Nguyen and Tim Kaldewey and Victor W. Lee and Scott A. Brandt and Pradeep Dubey", title = "Designing fast architecture-sensitive tree search on modern multicore\slash many-core processors", journal = j-TODS, volume = "36", number = "4", pages = "22:1--22:34", month = dec, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2043652.2043655", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 20 07:23:02 MST 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In-memory tree structured index search is a fundamental database operation. Modern processors provide tremendous computing power by integrating multiple cores, each with wide vector units. There has been much work to exploit modern processor architectures for database primitives like scan, sort, join, and aggregation. However, unlike other primitives, tree search presents significant challenges due to irregular and unpredictable data accesses in tree traversal. In this article, we present FAST, an extremely fast architecture-sensitive layout of the index tree. FAST is a binary tree logically organized to optimize for architecture features like page size, cache line size, and Single Instruction Multiple Data (SIMD) width of the underlying hardware.", acknowledgement = ack-nhfb, articleno = "22", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Alexe:2011:CSM, author = "Bogdan Alexe and Balder ten Cate and Phokion G. Kolaitis and Wang-Chiew Tan", title = "Characterizing schema mappings via data examples", journal = j-TODS, volume = "36", number = "4", pages = "23:1--23:48", month = dec, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2043652.2043656", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 20 07:23:02 MST 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Schema mappings are high-level specifications that describe the relationship between two database schemas; they are considered to be the essential building blocks in data exchange and data integration, and have been the object of extensive research investigations. Since in real-life applications schema mappings can be quite complex, it is important to develop methods and tools for understanding, explaining, and refining schema mappings. A promising approach to this effect is to use ``good'' data examples that illustrate the schema mapping at hand. We develop a foundation for the systematic investigation of data examples and obtain a number of results on both the capabilities and the limitations of data examples in explaining and understanding schema mappings.", acknowledgement = ack-nhfb, articleno = "23", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Cohen:2011:BET, author = "Sara Cohen and Yaacov Y. Weiss", title = "Bag equivalence of tree patterns", journal = j-TODS, volume = "36", number = "4", pages = "24:1--24:35", month = dec, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2043652.2043657", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 20 07:23:02 MST 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "When a query is evaluated under bag semantics, each answer is returned as many times as it has derivations. Bag semantics has long been recognized as important, especially when aggregation functions will be applied to query results. This article is the first to focus on bag semantics for tree pattern queries. In particular, the problem of bag equivalence of a large class of tree pattern queries (which can be used to model XPath) is explored. The queries can contain unions, branching, label wildcards, the vertical child and descendant axes, the horizontal following and following-sibling axes, as well as positional (i.e., first and last) axes.", acknowledgement = ack-nhfb, articleno = "24", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Abiteboul:2011:CCD, author = "Serge Abiteboul and T.-H. Hubert Chan and Evgeny Kharlamov and Werner Nutt and Pierre Senellart", title = "Capturing continuous data and answering aggregate queries in probabilistic {XML}", journal = j-TODS, volume = "36", number = "4", pages = "25:1--25:45", month = dec, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2043652.2043658", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 20 07:23:02 MST 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Sources of data uncertainty and imprecision are numerous. A way to handle this uncertainty is to associate probabilistic annotations to data. Many such probabilistic database models have been proposed, both in the relational and in the semi-structured setting. The latter is particularly well adapted to the management of uncertain data coming from a variety of automatic processes. An important problem, in the context of probabilistic XML databases, is that of answering aggregate queries (count, sum, avg, etc.), which has received limited attention so far. In a model unifying the various (discrete) semi-structured probabilistic models studied up to now, we present algorithms to compute the distribution of the aggregation values (exploiting some regularity properties of the aggregate functions) and probabilistic moments (especially expectation and variance) of this distribution.", acknowledgement = ack-nhfb, articleno = "25", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Bai:2011:CPT, author = "Xiao Bai and Rachid Guerraoui and Anne-Marie Kermarrec and Vincent Leroy", title = "Collaborative personalized top-$k$ processing", journal = j-TODS, volume = "36", number = "4", pages = "26:1--26:38", month = dec, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2043652.2043659", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 20 07:23:02 MST 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "This article presents P4Q, a fully decentralized gossip-based protocol to personalize query processing in social tagging systems. P4Q dynamically associates each user with social acquaintances sharing similar tagging behaviors. Queries are gossiped among such acquaintances, computed on-the-fly in a collaborative, yet partitioned manner, and results are iteratively refined and returned to the querier. Analytical and experimental evaluations convey the scalability of P4Q for top-k query processing, as well its inherent ability to cope with users updating profiles and departing.", acknowledgement = ack-nhfb, articleno = "26", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Goncalves:2011:DCQ, author = "Romulo Goncalves and Martin Kersten", title = "The {Data Cyclotron} query processing scheme", journal = j-TODS, volume = "36", number = "4", pages = "27:1--27:35", month = dec, year = "2011", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2043652.2043660", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 20 07:23:02 MST 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "A grand challenge of distributed query processing is to devise a self-organizing architecture which exploits all hardware resources optimally to manage the database hot set, minimize query response time, and maximize throughput without single point global coordination. The Data Cyclotron architecture [Goncalves and Kersten 2010] addresses this challenge using turbulent data movement through a storage ring built from distributed main memory and capitalizing on the functionality offered by modern remote-DMA network facilities. Queries assigned to individual nodes interact with the storage ring by picking up data fragments, which are continuously flowing around, that is, the hot set. The storage ring is steered by the Level Of Interest (LOI) attached to each data fragment, which represents the cumulative query interest as it passes around the ring multiple times.", acknowledgement = ack-nhfb, articleno = "27", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Graefe:2012:SBT, author = "Goetz Graefe", title = "A survey of {B}-tree logging and recovery techniques", journal = j-TODS, volume = "37", number = "1", pages = "1:1--1:35", month = feb, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2109196.2109197", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 1 17:45:17 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "B-trees have been ubiquitous in database management systems for several decades, and they serve in many other storage systems as well. Their basic structure and their basic operations are well understood including search, insertion, and deletion. However, implementation of transactional guarantees such as all-or-nothing failure atomicity and durability in spite of media and system failures seems to be difficult. High-performance techniques such as pseudo-deleted records, allocation-only logging, and transaction processing during crash recovery are widely used in commercial B-tree implementations but not widely understood. This survey collects many of these techniques as a reference for students, researchers, system architects, and software developers. Central in this discussion are physical data independence, separation of logical database contents and physical representation, and the concepts of user transactions and system transactions. Many of the techniques discussed are applicable beyond B-trees.", acknowledgement = ack-nhfb, articleno = "1", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Martinenghi:2012:PMR, author = "Davide Martinenghi and Marco Tagliasacchi", title = "Proximity measures for rank join", journal = j-TODS, volume = "37", number = "1", pages = "2:1--2:46", month = feb, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2109196.2109198", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 1 17:45:17 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We introduce the proximity rank join problem, where we are given a set of relations whose tuples are equipped with a score and a real-valued feature vector. Given a target feature vector, the goal is to return the K combinations of tuples with high scores that are as close as possible to the target and to each other, according to some notion of distance or dissimilarity. The setting closely resembles that of traditional rank join, but the geometry of the vector space plays a distinctive role in the computation of the overall score of a combination. Also, the input relations typically return their results either by distance from the target or by score. Because of these aspects, it turns out that traditional rank join algorithms, such as the well-known HRJN, have shortcomings in solving the proximity rank join problem, as they may read more input than needed. To overcome this weakness, we define a tight bound (used as a stopping criterion) that guarantees instance optimality, that is, an I/O cost is achieved that is always within a constant factor of optimal. The tight bound can also be used to drive an adaptive pulling strategy, deciding at each step which relation to access next. For practically relevant classes of problems, we show how to compute the tight bound efficiently. An extensive experimental study validates our results and demonstrates significant gains over existing solutions.", acknowledgement = ack-nhfb, articleno = "2", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Nuray-Turan:2012:AOS, author = "Rabia Nuray-Turan and Dmitri V. Kalashnikov and Sharad Mehrotra and Yaming Yu", title = "Attribute and object selection queries on objects with probabilistic attributes", journal = j-TODS, volume = "37", number = "1", pages = "3:1--3:??", month = feb, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2109196.2109199", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 1 17:45:17 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Modern data processing techniques such as entity resolution, data cleaning, information extraction, and automated tagging often produce results consisting of objects whose attributes may contain uncertainty. This uncertainty is frequently captured in the form of a set of multiple mutually exclusive value choices for each uncertain attribute along with a measure of probability for alternative values. However, the lay end-user, as well as some end-applications, might not be able to interpret the results if outputted in such a form. Thus, the question is how to present such results to the user in practice, for example, to support attribute-value selection and object selection queries the user might be interested in. Specifically, in this article we study the problem of maximizing the quality of these selection queries on top of such a probabilistic representation. The quality is measured using the standard and commonly used set-based quality metrics. We formalize the problem and then develop efficient approaches that provide high-quality answers for these queries. The comprehensive empirical evaluation over three different domains demonstrates the advantage of our approach over existing techniques.", acknowledgement = ack-nhfb, articleno = "3", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Liu:2012:DSR, author = "Ziyang Liu and Yi Chen", title = "Differentiating search results on structured data", journal = j-TODS, volume = "37", number = "1", pages = "4:1--4:??", month = feb, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2109196.2109200", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 1 17:45:17 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Studies show that about 50\% of Web search is for information exploration purposes, where a user would like to investigate, compare, evaluate, and synthesize multiple relevant results. Due to the absence of general tools that can effectively analyze and differentiate multiple results, a user has to manually read and comprehend potential large results in an exploratory search. Such a process is time consuming, labor intensive and error prone. Interestingly, we find that the metadata information embedded in structured data provides a potential for automating or semi-automating the comparison of multiple results. In this article we present an approach for structured data search result differentiation. We define the differentiability of query results and quantify the degree of difference. Then we define the problem of identifying a limited number of valid features in a result that can maximally differentiate this result from the others, which is proved NP-hard. We propose two local optimality conditions, namely single-swap and multi-swap, and design efficient algorithms to achieve local optimality. We then present a feature type-based approach, which further improves the quality of the features identified for result differentiation. To show the usefulness of our approach, we implemented a system CompareIt, which can be used to compare structured search results as well as any objects. Our empirical evaluation verifies the effectiveness and efficiency of the proposed approach.", acknowledgement = ack-nhfb, articleno = "4", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Yang:2012:SES, author = "Di Yang and Elke A. Rundensteiner and Matthew O. Ward", title = "Shared execution strategy for neighbor-based pattern mining requests over streaming windows", journal = j-TODS, volume = "37", number = "1", pages = "5:1--5:??", month = feb, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2109196.2109201", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 1 17:45:17 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In diverse applications ranging from stock trading to traffic monitoring, data streams are continuously monitored by multiple analysts for extracting patterns of interest in real time. These analysts often submit similar pattern mining requests yet customized with different parameter settings. In this work, we present shared execution strategies for processing a large number of neighbor-based pattern mining requests of the same type yet with arbitrary parameter settings. Such neighbor-based pattern mining requests cover a broad range of popular mining query types, including detection of clusters, outliers, and nearest neighbors. Given the high algorithmic complexity of the mining process, serving multiple such queries in a single system is extremely resource intensive. The naive method of detecting and maintaining patterns for different queries independently is often infeasible in practice, as its demands on system resources increase dramatically with the cardinality of the query workload. In order to maximize the efficiency of the system resource utilization for executing multiple queries simultaneously, we analyze the commonalities of the neighbor-based pattern mining queries, and identify several general optimization principles which lead to significant system resource sharing among multiple queries. In particular, as a preliminary sharing effort, we observe that the computation needed for the range query searches (the process of searching the neighbors for each object) can be shared among multiple queries and thus saves the CPU consumption. Then we analyze the interrelations between the patterns identified by queries with different parameters settings, including both pattern-specific and window-specific parameters. For that, we first introduce an incremental pattern representation, which represents the patterns identified by queries with different pattern-specific parameters within a single compact structure. This enables integrated pattern maintenance for multiple queries. Second, by leveraging the potential overlaps among sliding windows, we propose a metaquery strategy which utilizes a single query to answer multiple queries with different window-specific parameters. By combining these three techniques, namely the range query search sharing, integrated pattern maintenance, and metaquery strategy, our framework realizes fully shared execution of multiple queries with arbitrary parameter settings. It achieves significant savings of computational and memory resources due to shared execution. Our comprehensive experimental study, using real data streams from domains of stock trades and moving object monitoring, demonstrates that our solution is significantly faster than the independent execution strategy, while using only a small portion of memory space compared to the independent execution. We also show that our solution scales in handling large numbers of queries in the order of hundreds or even thousands under high input data rates.", acknowledgement = ack-nhfb, articleno = "5", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Re:2012:UCE, author = "Christopher R{\'e} and D. Suciu", title = "Understanding cardinality estimation using entropy maximization", journal = j-TODS, volume = "37", number = "1", pages = "6:1--6:??", month = feb, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2109196.2109202", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 1 17:45:17 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Cardinality estimation is the problem of estimating the number of tuples returned by a query; it is a fundamentally important task in data management, used in query optimization, progress estimation, and resource provisioning. We study cardinality estimation in a principled framework: given a set of statistical assertions about the number of tuples returned by a fixed set of queries, predict the number of tuples returned by a new query. We model this problem using the probability space, over possible worlds, that satisfies all provided statistical assertions and maximizes entropy. We call this the Entropy Maximization model for statistics (MaxEnt). In this article we develop the mathematical techniques needed to use the MaxEnt model for predicting the cardinality of conjunctive queries.", acknowledgement = ack-nhfb, articleno = "6", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Nuray-Turan:2012:EWQ, author = "Rabia Nuray-Turan and Dmitri V. Kalashnikov and Sharad Mehrotra", title = "Exploiting {Web} querying for {Web} people search", journal = j-TODS, volume = "37", number = "1", pages = "7:1--7:??", month = feb, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2109196.2109203", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 1 17:45:17 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Searching for people on the Web is one of the most common query types submitted to Web search engines today. However, when a person name is queried, the returned Webpages often contain documents related to several distinct namesakes who have the queried name. The task of disambiguating and finding the Webpages related to the specific person of interest is left to the user. Many Web People Search (WePS) approaches have been developed recently that attempt to automate this disambiguation process. Nevertheless, the disambiguation quality of these techniques leaves major room for improvement. In this article, we present a new WePS approach. It is based on issuing additional auxiliary queries to the Web to gain additional knowledge about the Webpages that need to be disambiguated. Thus, the approach uses the Web as an external data source by issuing queries to collect co-occurrence statistics. These statistics are used to assess the overlap of the contextual entities extracted from the Webpages. The article also proposes a methodology to make this Web querying technique efficient. Further, the article proposes an approach that is capable of combining various types of disambiguating information, including other common types of similarities, by applying a correlation clustering approach with after-clustering of singleton clusters. These properties allow the framework to get an advantage in terms of result quality over other state-of-the-art WePS techniques.", acknowledgement = ack-nhfb, articleno = "7", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Schneider:2012:CDB, author = "Markus Schneider and Tao Chen and Ganesh Viswanathan and Wenjie Yuan", title = "Cardinal directions between complex regions", journal = j-TODS, volume = "37", number = "2", pages = "8:1--8:??", month = may, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2188349.2188350", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 1 17:45:19 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Besides topological relationships and approximate relationships, cardinal directions like north and southwest have turned out to be an important class of qualitative spatial relationships. They are of interdisciplinary interest in fields like cognitive science, robotics, artificial intelligence, and qualitative spatial reasoning. In spatial databases and Geographic Information Systems (GIS) they are frequently used as join and selection criteria in spatial queries. However, the available computational models of cardinal directions suffer a number of problems like the use of too coarse approximations of the two spatial operand objects in terms of single representative points or minimum bounding rectangles, the lacking property of converseness of the cardinal directions computed, and the limited applicability to simple instead of complex regions only. This article proposes and formally defines a novel two-phase model, called the Objects Interaction Matrix (OIM) model, that solves these problems, and determines cardinal directions for even complex regions. The model consists of a tiling phase and an interpretation phase. In the tiling phase, a tiling strategy first determines the zones belonging to the nine cardinal directions of each individual region object and then intersects them. The result leads to a bounded grid called objects interaction grid. For each grid cell the information about the region objects that intersect it is stored in an objects interaction matrix. In the subsequent interpretation phase, a well-defined interpretation method is applied to such a matrix and determines the cardinal direction. Spatial example queries illustrate our new cardinal direction concept that is embedded in a spatial extension of SQL and provides user-defined cardinal direction predicates.", acknowledgement = ack-nhfb, articleno = "8", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Wijsen:2012:CCQ, author = "Jef Wijsen", title = "Certain conjunctive query answering in first-order logic", journal = j-TODS, volume = "37", number = "2", pages = "9:1--9:??", month = may, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2188349.2188351", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 1 17:45:19 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Primary key violations provide a natural means for modeling uncertainty in the relational data model. A repair (or possible world) of a database is then obtained by selecting a maximal number of tuples without ever selecting two distinct tuples that have the same primary key value. For a Boolean query q, the problem CERTAINTY( q ) takes as input a database db and asks whether q evaluates to true on every repair of db. We are interested in determining queries q for which CERTAINTY( q ) is first-order expressible (and hence in the low complexity class AC ${}^\circ $ ). For queries q in the class of conjunctive queries without self-join, we provide a necessary syntactic condition for first-order expressibility of CERTAINTY( q ). For acyclic queries (in the sense of Beeri et al. [1983]), this necessary condition is also a sufficient condition. So we obtain a decision procedure for first-order expressibility of CERTAINTY( q ) when q is acyclic and without self-join. We also show that if CERTAINTY( q ) is first-order expressible, its first-order definition, commonly called certain first-order rewriting, can be constructed in a rather straightforward way.", acknowledgement = ack-nhfb, articleno = "9", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Abiteboul:2012:CWS, author = "Serge Abiteboul and Pierre Bourhis and Victor Vianu", title = "Comparing workflow specification languages: a matter of views", journal = j-TODS, volume = "37", number = "2", pages = "10:1--10:??", month = may, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2188349.2188352", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 1 17:45:19 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We address the problem of comparing the expressiveness of workflow specification formalisms using a notion of view of a workflow. Views allow to compare widely different workflow systems by mapping them to a common representation capturing the observables relevant to the comparison. Using this framework, we compare the expressiveness of several workflow specification mechanisms, including automata, temporal constraints, and pre-and postconditions, with XML and relational databases as underlying data models. One surprising result shows the considerable power of static constraints to simulate apparently much richer workflow control mechanisms.", acknowledgement = ack-nhfb, articleno = "10", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Tassa:2012:SDC, author = "Tamir Tassa and Ehud Gudes", title = "Secure distributed computation of anonymized views of shared databases", journal = j-TODS, volume = "37", number = "2", pages = "11:1--11:??", month = may, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2188349.2188353", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 1 17:45:19 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We consider the problem of computing efficient anonymizations of partitioned databases. Given a database that is partitioned between several sites, either horizontally or vertically, we devise secure distributed algorithms that allow the different sites to obtain a k -anonymized and l-diverse view of the union of their databases, without disclosing sensitive information. Our algorithms are based on the sequential algorithm [Goldberger and Tassa 2010] that offers anonymizations with utility that is significantly better than other anonymization algorithms, and in particular those that were implemented so far in the distributed setting. Our algorithms can apply to different generalization techniques and utility measures and to any number of sites. While previous distributed algorithms depend on costly cryptographic primitives, the cryptographic assumptions of our solution are surprisingly minimal.", acknowledgement = ack-nhfb, articleno = "11", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Sheng:2012:EAA, author = "Cheng Sheng and Yufei Tao and Jianzhong Li", title = "Exact and approximate algorithms for the most connected vertex problem", journal = j-TODS, volume = "37", number = "2", pages = "12:1--12:??", month = may, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2188349.2188354", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 1 17:45:19 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "An (edge) hidden graph is a graph whose edges are notexplicitly given. Detecting the presence of an edge requires an expensive edge probing query. We consider the $k$ Most Connected Vertex ($k$-MCV) problem on hidden bipartite graphs. Given a bipartite graph $G$ with independent vertex sets $B$ and $W$, the goal is to find the $k$ vertices in $B$ with the largest degrees using the minimum number of queries. This problem can be regarded as a top-$k$ extension of semi-join, and is encountered in several applications in practice. If $B$ and $W$ have $n$ and $m$ vertices, respectively, the number of queries needed to solve the problem is $ n m $ in the worst case. This, however, is a pessimistic estimate on how many queries are necessary on practical data. In fact, on some inputs, the problem may be settled with only $ k m + n $ queries, which is significantly lower than $ n m $ for $ k \ll n $. The huge difference between $ k m + n $ and $ n m $ makes it interesting to design an adaptive algorithm that is guaranteed to achieve the best possible performance on every input $G$. For $ k \leq n / 2 $, we give an algorithm that is instance optimal among a broad class of solutions. This means that, for any $G$, our algorithm can perform more queries than the optimal solution (which is unknown) by only a constant factor, which can be shown at most $2$. As a second step, we study an $ \epsilon $-approximate version of the $k$-MCV problem, where $ \epsilon $ is a parameter satisfying $ 0 < \epsilon < 1 $. The goal is to return $k$ black vertices $ b_1, \ldots, b_k $ such that the degree of $ b_i (i \leq k) $ can be smaller than $ t_i $ by a factor of at most $ \epsilon $, where $ t_i, \ldots, t_k $ (in nonascending order) are the degrees of the $k$ most connected black vertices. We give an efficient randomized algorithm that successfully finds the correct answer with high probability. In particular, for a fixed $ \epsilon $ and a fixed success probability, our algorithm performs $ o(n m) $ queries in expectation for $ t_k = \omega (\log n) $. In other words, whenever $ t_k $ is greater than $ \log n $ by more than a constant, our algorithm beats the $ \Omega (n m) $ lower bound for solving the $k$-MCV problem exactly. All the proposed algorithms, despite the complication of their underlying theory, are simple enough for easy implementation in practice. Extensive experiments have confirmed that their performance in reality agrees with our theoretical findings very well.", acknowledgement = ack-nhfb, articleno = "12", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Hartmann:2012:IPD, author = "Sven Hartmann and Sebastian Link", title = "The implication problem of data dependencies over {SQL} table definitions: {Axiomatic}, algorithmic and logical characterizations", journal = j-TODS, volume = "37", number = "2", pages = "13:1--13:??", month = may, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2188349.2188355", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 1 17:45:19 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We investigate the implication problem for classes of data dependencies over SQL table definitions. Under Zaniolo's ``no information'' interpretation of null markers we establish an axiomatization and algorithms to decide the implication problem for the combined class of functional and multivalued dependencies in the presence of NOT NULL constraints. The resulting theory subsumes three previously orthogonal frameworks. We further show that the implication problem of this class is equivalent to that in a propositional fragment of Schaerf and Cadoli's [1995] family of para-consistent S-3 logics. In particular, S is the set of variables that correspond to attributes declared NOT NULL. We also show how our equivalences for multivalued dependencies can be extended to Delobel's class of full first-order hierarchical decompositions, and the equivalences for functional dependencies can be extended to arbitrary Boolean dependencies. These dualities allow us to transfer several findings from the propositional fragments to the corresponding classes of data dependencies, and vice versa. We show that our results also apply to Codd's null interpretation ``value unknown at present'', but not to Imielinski's [1989] or-relations utilizing Levene and Loizou's weak possible world semantics [Levene and Loizou 1998]. Our findings establish NOT NULL constraints as an effective mechanism to balance not only the certainty in database relations but also the expressiveness with the efficiency of entailment relations. They also control the degree by which the implication of data dependencies over total relations is soundly approximated in SQL table definitions.", acknowledgement = ack-nhfb, articleno = "13", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Zhang:2012:SS, author = "Wenjie Zhang and Xuemin Lin and Ying Zhang and Muhammad Aamir Cheema and Qing Zhang", title = "Stochastic skylines", journal = j-TODS, volume = "37", number = "2", pages = "14:1--14:??", month = may, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2188349.2188356", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 1 17:45:19 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In many applications involving multiple criteria optimal decision making, users may often want to make a personal trade-off among all optimal solutions for selecting one object that fits best their personal needs. As a key feature, the skyline in a multidimensional space provides the minimum set of candidates for such purposes by removing all points not preferred by any (monotonic) utility/scoring functions; that is, the skyline removes all objects not preferred by any user no matter how their preferences vary. Driven by many recent applications with uncertain data, the probabilistic skyline model is proposed to retrieve uncertain objects based on skyline probabilities. Nevertheless, skyline probabilities cannot capture the preferences of monotonic utility functions. Motivated by this, in this article we propose a novel skyline operator, namely stochastic skylines. In the light of the expected utility principle, stochastic skylines guarantee to provide the minimum set of candidates to optimal solutions over a family of utility functions. We first propose the lskyline operator based on the lower orthant orders. lskyline guarantees to provide the minimum set of candidates to the optimal solutions for the family of monotonic multiplicative utility functions. While lskyline works very effectively for the family of multiplicative functions, it may miss optimal solutions for other utility /scoring functions (e.g., linear functions). To resolve this, we also propose a general stochastic skyline operator, gskyline, based on the usual orders. gskyline provides the minimum candidate set to the optimal solutions for all monotonic functions. For the first time regarding the existing literature, we investigate the complexities of determining a stochastic order between two uncertain objects whose probability distributions are described discretely. We firstly show that determining the lower orthant order is NP-complete with respect to the dimensionality; consequently the problem of computing lskyline is NP-complete. We also show an interesting result as follows. While the usual order involves more complicated geometric forms than the lower orthant order, the usual order may be determined in polynomial time regarding all the inputs, including the dimensionality; this implies that gskyline can be computed in polynomial time. A general framework is developed for efficiently and effectively retrieving lskyline and gskyline from a set of uncertain objects, respectively, together with efficient and effective filtering techniques. Novel and efficient verification algorithms are developed to efficiently compute lskyline over multidimensional uncertain data, which run in polynomial time if the dimensionality is fixed, and to efficiently compute gskyline in polynomial time regarding all inputs. We also show, by theoretical analysis and experiments, that the sizes of lskyline and gskyline are both quite similar to that of conventional skyline over certain data. Comprehensive experiments demonstrate that our techniques are efficient and scalable regarding both CPU and IO costs.", acknowledgement = ack-nhfb, articleno = "14", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Xia:2012:OSS, author = "Tian Xia and Donghui Zhang and Zheng Fang and Cindy Chen and Jie Wang", title = "Online subspace skyline query processing using the {Compressed SkyCube}", journal = j-TODS, volume = "37", number = "2", pages = "15:1--15:??", month = may, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2188349.2188357", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 1 17:45:19 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The skyline query can help identify the ``best'' objects in a multi-attribute dataset. During the past decade, this query has received considerable attention in the database research community. Most research focused on computing the ``skyline'' of a dataset, or the set of ``skyline objects'' that are not dominated by any other object. Such algorithms are not appropriate in an online system, which should respond in real time to skyline query requests with arbitrary subsets of the attributes (also called subspaces). To guarantee real-time response, an online system should precompute the skylines for all subspaces, and look up a skyline upon query. Unfortunately, because the number of subspaces is exponential to the number of attributes, such pre computation has very expensive storage cost and update cost. We propose the Compressed SkyCube (CSC) that is much more compact, yet can still return the skyline of any subspace without consulting the base table. The CSC therefore combines the advantage of precomputation in that it can respond to queries in real time, and the advantage of no-precomputation in that it has efficient space cost and update cost. This article presents the CSC data structures, the CSC query algorithm, the CSC update algorithm, and the CSC initial computation scheme. A solution to extend to high-dimensional data is also proposed.", acknowledgement = ack-nhfb, articleno = "15", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Ozsoyoglu:2012:Fa, author = "Z. Meral {\"O}zsoyoglu", title = "Foreword", journal = j-TODS, volume = "37", number = "3", pages = "16:1--16:??", month = aug, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2338626.2338627", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Sep 6 09:52:39 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "16", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Graefe:2012:FBT, author = "Goetz Graefe and Hideaki Kimura and Harumi Kuno", title = "{Foster} {B}-trees", journal = j-TODS, volume = "37", number = "3", pages = "17:1--17:??", month = aug, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2338626.2338630", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Sep 6 09:52:39 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Foster B-trees are a new variant of B-trees that combines advantages of prior B-tree variants optimized for many-core processors and modern memory hierarchies with flash storage and nonvolatile memory. Specific goals include: (i) minimal concurrency control requirements for the data structure, (ii) efficient migration of nodes to new storage locations, and (iii) support for continuous and comprehensive self-testing. Like B$^{\rm link}$-trees, Foster B-trees optimize latching without imposing restrictions or specific designs on transactional locking, for example, key range locking. Like write-optimized B-trees, and unlike B$^{\rm link}$-trees, Foster B-trees enable large writes on RAID and flash devices as well as wear leveling and efficient defragmentation. Finally, they support continuous and inexpensive yet comprehensive verification of all invariants, including all cross-node invariants of the B-tree structure. An implementation and a performance evaluation show that the Foster B-tree supports high concurrency and high update rates without compromising consistency, correctness, or read performance.", acknowledgement = ack-nhfb, articleno = "17", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Wang:2012:RAT, author = "Junhu Wang and Jeffrey Xu Yu", title = "Revisiting answering tree pattern queries using views", journal = j-TODS, volume = "37", number = "3", pages = "18:1--18:??", month = aug, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2338626.2338631", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Sep 6 09:52:39 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We revisit the problem of answering tree pattern queries using views. We first show that, for queries and views that do not have nodes labeled with the wildcard *, there is an approach which does not require us to find any rewritings explicitly, yet which produces the same answers as the maximal contained rewriting. Then, using the new approach, we give simple conditions and a corresponding algorithm for identifying redundant view answers, which are view answers that can be ignored when evaluating the maximal contained rewriting. We also consider redundant view answers in the case where there are multiple views, the relationship between redundant views and redundant view answers, and discuss how to combine the removal of redundant view answers and redundant rewritings. We show that the aforesaid results can be extended to a number of other special cases. Finally, for arbitrary queries and views in P$^{{/, / /, ., []}}$, we provide a method to find the maximal contained rewriting and show how to answer the query using views without explicitly finding the rewritings.", acknowledgement = ack-nhfb, articleno = "18", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{David:2012:ERA, author = "Claire David and Leonid Libkin and Tony Tan", title = "Efficient reasoning about data trees via integer linear programming", journal = j-TODS, volume = "37", number = "3", pages = "19:1--19:??", month = aug, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2338626.2338632", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Sep 6 09:52:39 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Data trees provide a standard abstraction of XML documents with data values: they are trees whose nodes, in addition to the usual labels, can carry labels from an infinite alphabet (data). Therefore, one is interested in decidable formalisms for reasoning about data trees. While some are known-such as the two-variable logic-they tend to be of very high complexity, and most decidability proofs are highly nontrivial. We are therefore interested in reasonable complexity formalisms as well as better techniques for proving decidability. Here we show that many decidable formalisms for data trees are subsumed-fully or partially-by the power of tree automata together with set constraints and linear constraints on cardinalities of various sets of data values. All these constraints can be translated into instances of integer linear programming, giving us an NP upper bound on the complexity of the reasoning tasks. We prove that this bound, as well as the key encoding technique, remain very robust, and allow the addition of features such as counting of paths and patterns, and even a concise encoding of constraints, without increasing the complexity. The NP bound is tight, as we also show that the satisfiability of a single set constraint is already NP-hard. We then relate our results to several reasoning tasks over XML documents, such as satisfiability of schemas and data dependencies and satisfiability of the two-variable logic. As a final contribution, we describe experimental results based on the implementation of some reasoning tasks using the SMT solver Z3.", acknowledgement = ack-nhfb, articleno = "19", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Lemire:2012:RRB, author = "Daniel Lemire and Owen Kaser and Eduardo Gutarra", title = "Reordering rows for better compression: {Beyond} the lexicographic order", journal = j-TODS, volume = "37", number = "3", pages = "20:1--20:??", month = aug, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2338626.2338633", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Sep 6 09:52:39 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Sorting database tables before compressing them improves the compression rate. Can we do better than the lexicographical order? For minimizing the number of runs in a run-length encoding compression scheme, the best approaches to row-ordering are derived from traveling salesman heuristics, although there is a significant trade-off between running time and compression. A new heuristic, Multiple Lists, which is a variant on Nearest Neighbor that trades off compression for a major running-time speedup, is a good option for very large tables. However, for some compression schemes, it is more important to generate long runs rather than few runs. For this case, another novel heuristic, Vortex, is promising. We find that we can improve run-length encoding up to a factor of 3 whereas we can improve prefix coding by up to 80\%: these gains are on top of the gains due to lexicographically sorting the table. We prove that the new row reordering is optimal (within 10\%) at minimizing the runs of identical values within columns, in a few cases.", acknowledgement = ack-nhfb, articleno = "20", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Gupta:2012:EQE, author = "Nitin Gupta and Lucja Kot and Sudip Roy and Gabriel Bender and Johannes Gehrke and Christoph Koch", title = "Entangled queries: {Enabling} declarative data-driven coordination", journal = j-TODS, volume = "37", number = "3", pages = "21:1--21:??", month = aug, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2338626.2338629", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Sep 6 09:52:39 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Many data-driven social and Web applications involve collaboration and coordination. The vision of Declarative Data-Driven Coordination (D3C), proposed in Kot et al. [2010], is to support coordination in the spirit of data management: to make it data-centric and to specify it using convenient declarative languages. This article introduces entangled queries, a language that extends SQL by constraints that allow for the coordinated choice of result tuples across queries originating from different users or applications. It is nontrivial to define a declarative coordination formalism without arriving at the general (NP-complete) Constraint Satisfaction Problem from AI. In this article, we propose an efficiently enforceable syntactic safety condition that we argue is at the sweet spot where interesting declarative power meets applicability in large-scale data management systems and applications. The key computational problem of D3C is to match entangled queries to achieve coordination. We present an efficient matching algorithm which statically analyzes query workloads and merges coordinating entangled queries into compound SQL queries. These can be sent to a standard database system and return only coordinated results. We present the overall architecture of an implemented system that contains our evaluation algorithm. We also describe a proof-of-concept Facebook application we have built on top of this system to allow friends to coordinate flight plans. Finally, we evaluate the performance of the matching algorithm experimentally on realistic coordination workloads.", acknowledgement = ack-nhfb, articleno = "21", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Damaggio:2012:ASD, author = "Elio Damaggio and Alin Deutsch and Victor Vianu", title = "Artifact systems with data dependencies and arithmetic", journal = j-TODS, volume = "37", number = "3", pages = "22:1--22:??", month = aug, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2338626.2338628", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Sep 6 09:52:39 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We study the static verification problem for data-centric business processes, specified in a variant of IBM's ``business artifact'' model. Artifacts are records of variables that correspond to business-relevant objects and are updated by a set of services equipped with pre- and postconditions, that implement business process tasks. The verification problem consists in statically checking whether all runs of an artifact system satisfy desirable properties expressed in a first-order extension of linear-time temporal logic. Previous work identified the class of guarded artifact systems and properties, for which verification is decidable. However, the results suffer an important limitation: they fail in the presence of even very simple data dependencies or arithmetic, both crucial to real-life business processes. In this article, we extend the artifact model and verification results to alleviate this limitation. We identify a practically significant class of business artifacts with data dependencies and arithmetic, for which verification is decidable. The technical machinery needed to establish the results is fundamentally different from previous work. While the worst-case complexity of verification is nonelementary, we identify various realistic restrictions yielding more palatable upper bounds.", acknowledgement = ack-nhfb, articleno = "22", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Ozsoyoglu:2012:Fb, author = "Z. Meral {\"O}zsoyoglu", title = "Foreword", journal = j-TODS, volume = "37", number = "4", pages = "23:1--23:??", month = dec, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2389241.2389242", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Dec 20 19:03:29 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "23", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Kimelfeld:2012:MCV, author = "Benny Kimelfeld and Jan Vondr{\'a}k and Ryan Williams", title = "Maximizing Conjunctive Views in Deletion Propagation", journal = j-TODS, volume = "37", number = "4", pages = "24:1--24:??", month = dec, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2389241.2389243", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Dec 20 19:03:29 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In deletion propagation, tuples from the database are deleted in order to reflect the deletion of a tuple from the view. Such an operation may result in the (often necessary) deletion of additional tuples from the view, besides the intentionally deleted one. The article studies the complexity of deletion propagation, where the view is defined by a conjunctive query (CQ), and the goal is to maximize the number of tuples that remain in the view. Buneman et al. showed that for some simple CQs, this problem can be solved by a straightforward algorithm, which is called here the unidimensional algorithm. The article identifies additional cases of CQs where the unidimensional algorithm succeeds, and in contrast, shows that for some other CQs the problem is NP-hard to approximate better than some constant ratio. In fact, it is shown here that among the CQs without self joins, the hard CQs are exactly the ones that the unidimensional algorithm fails on. In other words, the following dichotomy result is proved: for every CQ without self joins, deletion propagation is either APX-hard or solvable (in polynomial time) by the unidimensional algorithm. The article then presents approximation algorithms for certain CQs where deletion propagation is APX-hard. Specifically, two constant-ratio (and polynomial-time) approximation algorithms are given for the class of sunflower CQs (i.e., CQs having a sunflower hypergraph) without self joins. The first algorithm, providing the approximation ratio $ 1 - 1 / e $, is obtained by formulating the problem at hand as that of maximizing a monotone submodular function subject to a matroid constraint, and then using a known algorithm for such maximization. The second algorithm gives a smaller approximation ratio, $ 1 / 2 $, yet in polynomial time even under combined complexity. Finally, it is shown that self joins can significantly harden approximation in deletion propagation.", acknowledgement = ack-nhfb, articleno = "24", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Fan:2012:DCD, author = "Wenfei Fan and Floris Geerts and Jef Wijsen", title = "Determining the Currency of Data", journal = j-TODS, volume = "37", number = "4", pages = "25:1--25:??", month = dec, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2389241.2389244", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Dec 20 19:03:29 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Data in real-life databases become obsolete rapidly. One often finds that multiple values of the same entity reside in a database. While all of these values were once correct, most of them may have become stale and inaccurate. Worse still, the values often do not carry reliable timestamps. With this comes the need for studying data currency, to identify the current value of an entity in a database and to answer queries with the current values, in the absence of reliable timestamps. This article investigates the currency of data. (1) We propose a model that specifies partial currency orders in terms of simple constraints. The model also allows us to express what values are copied from other data sources, bearing currency orders in those sources, in terms of copy functions defined on correlated attributes. (2) We study fundamental problems for data currency, to determine whether a specification is consistent, whether a value is more current than another, and whether a query answer is certain no matter how partial currency orders are completed. (3) Moreover, we identify several problems associated with copy functions, to decide whether a copy function imports sufficient current data to answer a query, whether a copy function can be extended to import necessary current data for a query while respecting the constraints, and whether it suffices to copy data of a bounded size. (4) We establish upper and lower bounds of these problems, all matching, for combined complexity and data complexity, and for a variety of query languages. We also identify special cases that warrant lower complexity.", acknowledgement = ack-nhfb, articleno = "25", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Sheng:2012:WCE, author = "Cheng Sheng and Yufei Tao", title = "Worst-Case {I/O}-Efficient Skyline Algorithms", journal = j-TODS, volume = "37", number = "4", pages = "26:1--26:??", month = dec, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2389241.2389245", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Dec 20 19:03:29 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We consider the skyline problem (aka the maxima problem), which has been extensively studied in the database community. The input is a set $P$ of $d$ dimensional points. A point dominates another if the coordinate of the former is at most that of the latter on every dimension. The goal is to find the skyline, which is the set of points $ p \in P $ such that $p$ is not dominated by any other point in $P$. The main result of this article is that, for any fixed dimensionality $ d \geq 3 $, in external memory the skyline problem can be settled by performing $ O((N / B) \log_{M / B}^{d - 2}(N / B)) $ I/Os in the worst case, where $N$ is the cardinality of $P$, $B$ the size of a disk block, and $M$ the capacity of main memory. Similar bounds can also be achieved for computing several skyline variants, including the $k$-dominant skyline, $k$-skyband, and $ \alpha $ skyline. Furthermore, the performance can be improved if some dimensions of the data space have small domains. When the dimensionality $d$ is not fixed, the challenge is to outperform the naive algorithm that simply checks all pairs of points in $ P \times P $. We give an algorithm that terminates in $ O((N / B) \log^{d - 2} N) $ I/Os, thus beating the naive solution for any $ d = O(\log N / \log \log N) $.", acknowledgement = ack-nhfb, articleno = "26", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Li:2012:SPS, author = "Boduo Li and Edward Mazur and Yanlei Diao and Andrew McGregor and Prashant Shenoy", title = "{SCALLA}: a Platform for Scalable One-Pass Analytics Using {MapReduce}", journal = j-TODS, volume = "37", number = "4", pages = "27:1--27:??", month = dec, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2389241.2389246", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Dec 20 19:03:29 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Today's one-pass analytics applications tend to be data-intensive in nature and require the ability to process high volumes of data efficiently. MapReduce is a popular programming model for processing large datasets using a cluster of machines. However, the traditional MapReduce model is not well-suited for one-pass analytics, since it is geared towards batch processing and requires the dataset to be fully loaded into the cluster before running analytical queries. This article examines, from a systems standpoint, what architectural design changes are necessary to bring the benefits of the MapReduce model to incremental one-pass analytics. Our empirical and theoretical analyses of Hadoop-based MapReduce systems show that the widely used sort-merge implementation for partitioning and parallel processing poses a fundamental barrier to incremental one-pass analytics, despite various optimizations. To address these limitations, we propose a new data analysis platform that employs hash techniques to enable fast in-memory processing, and a new frequent key based technique to extend such processing to workloads that require a large key-state space. Evaluation of our Hadoop-based prototype using real-world workloads shows that our new platform significantly improves the progress of map tasks, allows the reduce progress to keep up with the map progress, with up to 3 orders of magnitude reduction of internal data spills, and enables results to be returned continuously during the job.", acknowledgement = ack-nhfb, articleno = "27", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Li:2012:WHT, author = "Yinan Li and Jignesh M. Patel and Allison Terrell", title = "{WHAM}: a High-Throughput Sequence Alignment Method", journal = j-TODS, volume = "37", number = "4", pages = "28:1--28:??", month = dec, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2389241.2389247", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Dec 20 19:03:29 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Over the last decade, the cost of producing genomic sequences has dropped dramatically due to the current so-called next-generation sequencing methods. However, these next-generation sequencing methods are critically dependent on fast and sophisticated data processing methods for aligning a set of query sequences to a reference genome using rich string matching models. The focus of this work is on the design, development and evaluation of a data processing system for this crucial ``short read alignment'' problem. Our system, called WHAM, employs hash-based indexing methods and bitwise operations for sequence alignments. It allows rich match models and it is significantly faster than the existing state-of-the-art methods. In addition, its relative speedup over the existing method is poised to increase in the future in which read sequence lengths will increase.", acknowledgement = ack-nhfb, articleno = "28", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Xie:2012:FAS, author = "Kexin Xie and Ke Deng and Shuo Shang and Xiaofang Zhou and Kai Zheng", title = "Finding Alternative Shortest Paths in Spatial Networks", journal = j-TODS, volume = "37", number = "4", pages = "29:1--29:??", month = dec, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2389241.2389248", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Dec 20 19:03:29 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Shortest path query is one of the most fundamental queries in spatial network databases. There exist algorithms that can process shortest path queries in real time. However, many complex applications require more than just the calculation of a single shortest path. For example, one of the common ways to determine the importance (or price) of a vertex or an edge in spatial network is to use Vickrey pricing, which intuitively values the vertex $v$ (or edge $e$ ) based on how much harder for travelling from the sources to the destinations without using $v$ (or $e$ ). In such cases, the alternative shortest paths without using $v$ (or $e$ ) are required. In this article, we propose using a precomputation based approach for both single pair alternative shortest path and all pairs shortest paths processing. To compute the alternative shortest path between a source and a destination efficiently, a na{\"\i}ive way is to precompute and store all alternative shortest paths between every pair of vertices avoiding every possible vertex (or edge), which requires $ O(n^4) $ space. Currently, the state of the art approach for reducing the storage cost is to choose a subset of the vertices as center points, and only store the single-source alternative shortest paths from those center points. Such approach has the space complexity of $ O(n^2 \log n) $. We propose a storage scheme termed iSPQF, which utilizes shortest path quadtrees by observing the relationships between each avoiding vertex and its corresponding alternative shortest paths. We have reduced the space complexity from the na{\"\i}ive $ O(n^4) $ (or the state of the art $ O(n^4 \log n) $) to $ O(\min (\gamma, L) n^{1.5}) $ with comparable query performance of $ O(K) $, where $K$ is the number of vertices in the returned paths, $L$ is the diameter of the spatial network, and $ \gamma $ is a value that depends on the structure of the spatial network, which is empirically estimated to be 40 for real road networks. Experiments on real road networks have shown that the space cost of the proposed iSPQF is scalable, and both the algorithms based on iSPQF are efficient.", acknowledgement = ack-nhfb, articleno = "29", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Amsterdamer:2012:PM, author = "Yael Amsterdamer and Daniel Deutch and Tova Milo and Val Tannen", title = "On Provenance Minimization", journal = j-TODS, volume = "37", number = "4", pages = "30:1--30:??", month = dec, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2389241.2389249", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Dec 20 19:03:29 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Provenance information has been proved to be very effective in capturing the computational process performed by queries, and has been used extensively as the input to many advanced data management tools (e.g., view maintenance, trust assessment, or query answering in probabilistic databases). We observe here that while different (set-)equivalent queries may admit different provenance expressions when evaluated on the same database, there is always some part of these expressions that is common to all. We refer to this part as the core provenance. In addition to being informative, the core provenance is also useful as a compact input to the aforementioned data management tools. We formally define the notion of core provenance. We study algorithms that, given a query, compute an equivalent (called p-minimal) query that for every input database, the provenance of every result tuple is the core provenance. We study such algorithms for queries of varying expressive power (namely conjunctive queries with disequalities and unions thereof). Finally, we observe that, in general, one would not want to require database systems to execute a specific p-minimal query, but instead to be able to find, possibly off-line, the core provenance of a given tuple in the output (computed by an arbitrary equivalent query), without reevaluating the query. We provide algorithms for such direct computation of the core provenance.", acknowledgement = ack-nhfb, articleno = "30", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Barcelo:2012:ELP, author = "Pablo Barcel{\'o} and Leonid Libkin and Anthony W. Lin and Peter T. Wood", title = "Expressive Languages for Path Queries over Graph-Structured Data", journal = j-TODS, volume = "37", number = "4", pages = "31:1--31:??", month = dec, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2389241.2389250", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Dec 20 19:03:29 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "For many problems arising in the setting of graph querying (such as finding semantic associations in RDF graphs, exact and approximate pattern matching, sequence alignment, etc.), the power of standard languages such as the widely studied conjunctive regular path queries (CRPQs) is insufficient in at least two ways. First, they cannot output paths and second, more crucially, they cannot express relationships among paths. We thus propose a class of extended CRPQs, called ECRPQs, which add regular relations on tuples of paths, and allow path variables in the heads of queries. We provide several examples of their usefulness in querying graph structured data, and study their properties. We analyze query evaluation and representation of tuples of paths in the output by means of automata. We present a detailed analysis of data and combined complexity of queries, and consider restrictions that lower the complexity of ECRPQs to that of relational conjunctive queries. We study the containment problem, and look at further extensions with first-order features, and with nonregular relations that add arithmetic constraints on the lengths of paths and numbers of occurrences of labels.", acknowledgement = ack-nhfb, articleno = "31", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Cong:2012:PED, author = "Gao Cong and Wenfei Fan and Anastasios Kementsietsidis and Jianzhong Li and Xianmin Liu", title = "Partial Evaluation for Distributed {XPath} Query Processing and Beyond", journal = j-TODS, volume = "37", number = "4", pages = "32:1--32:??", month = dec, year = "2012", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2389241.2389251", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Dec 20 19:03:29 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "This article proposes algorithms for evaluating XPath queries over an XML tree that is partitioned horizontally and vertically, and is distributed across a number of sites. The key idea is based on partial evaluation: it is to send the whole query to each site that partially evaluates the query, in parallel, and sends the results as compact (Boolean) functions to a coordinator that combines these to obtain the result. This approach possesses the following performance guarantees. First, each site is visited at most twice for data-selecting XPath queries, and only once for Boolean XPath queries. Second, the network traffic is determined by the answer to the query, rather than the size of the tree. Third, the total computation is comparable to that of centralized algorithms on the tree stored in a single site, regardless of how the tree is fragmented and distributed. We also present a MapReduce algorithm for evaluating Boolean XPath queries, based on partial evaluation. In addition, we provide algorithms to evaluate XPath queries on very large XML trees, in a centralized setting. We show both analytically and empirically that our techniques are scalable with large trees and complex XPath queries. These results, we believe, illustrate the usefulness and potential of partial evaluation in distributed systems as well as centralized XML stores for evaluating XPath queries and beyond.", acknowledgement = ack-nhfb, articleno = "32", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Papavasileiou:2013:HLC, author = "Vicky Papavasileiou and Giorgos Flouris and Irini Fundulaki and Dimitris Kotzinos and Vassilis Christophides", title = "High-level change detection in {RDF(S) KBs}", journal = j-TODS, volume = "38", number = "1", pages = "1:1--1:??", month = apr, year = "2013", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Apr 23 18:06:18 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "With the increasing use of Web 2.0 to create, disseminate, and consume large volumes of data, more and more information is published and becomes available for potential data consumers, that is, applications/services, individual users and communities, outside their production site. The most representative example of this trend is Linked Open Data (LOD), a set of interlinked data and knowledge bases. The main challenge in this context is data governance within loosely coordinated organizations that are publishing added-value interlinked data on the Web, bringing together issues related to data management and data quality, in order to support the full lifecycle of data production, consumption, and management. In this article, we are interested in curation issues for RDF(S) data, which is the default data model for LOD. In particular, we are addressing change management for RDF(S) data maintained by large communities (scientists, librarians, etc.) which act as curators to ensure high quality of data. Such curated Knowledge Bases (KBs) are constantly evolving for various reasons, such as the inclusion of new experimental evidence or observations, or the correction of erroneous conceptualizations. Managing such changes poses several research problems, including the problem of detecting the changes (delta) between versions of the same KB developed and maintained by different groups of curators, a crucial task for assisting them in understanding the involved changes. This becomes all the more important as curated KBs are interconnected (through copying or referencing) and thus changes need to be propagated from one KB to another either within or across communities. This article addresses this problem by proposing a change language which allows the formulation of concise and intuitive deltas. The language is expressive enough to describe unambiguously any possible change encountered in curated KBs expressed in RDF(S), and can be efficiently and deterministically detected in an automated way. Moreover, we devise a change detection algorithm which is sound and complete with respect to the aforementioned language, and study appropriate semantics for executing the deltas expressed in our language in order to move backwards and forwards in a multiversion repository, using only the corresponding deltas. Finally, we evaluate through experiments the effectiveness and efficiency of our algorithms using real ontologies from the cultural, bioinformatics, and entertainment domains.", acknowledgement = ack-nhfb, articleno = "1", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Jaiswal:2013:SME, author = "Anuj Jaiswal and David J. Miller and Prasenjit Mitra", title = "Schema matching and embedded value mapping for databases with opaque column names and mixed continuous and discrete-valued data fields", journal = j-TODS, volume = "38", number = "1", pages = "2:1--2:??", month = apr, year = "2013", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Apr 23 18:06:18 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Schema matching and value mapping across two information sources, such as databases, are critical information aggregation tasks. Before data can be integrated from multiple tables, the columns and values within the tables must be matched. The complexities of both these problems grow quickly with the number of attributes to be matched and due to multiple semantics of data values. Traditional research has mostly tackled schema matching and value mapping independently, and for categorical (discrete-valued) attributes. We propose novel methods that leverage value mappings to enhance schema matching in the presence of opaque column names for schemas consisting of both continuous and discrete-valued attributes. An additional source of complexity is that a discrete-valued attribute in one schema could in fact be a quantized, encoded version of a continuous-valued attribute in the other schema. In our approach, which can tackle both ``onto'' and bijective schema matching, the fitness objective for matching a pair of attributes from two schemas exploits the statistical distribution over values within the two attributes. Suitable fitness objectives are based on Euclidean-distance and the data log-likelihood, both of which are applied in our experimental study. A heuristic local descent optimization strategy that uses two-opt switching to optimize attribute matches, while simultaneously embedding value mappings, is applied for our matching methods. Our experiments show that the proposed techniques matched mixed continuous and discrete-valued attribute schemas with high accuracy and, thus, should be a useful addition to a framework of (semi) automated tools for data alignment.", acknowledgement = ack-nhfb, articleno = "2", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Grust:2013:OSQ, author = "Torsten Grust and Jan Rittinger", title = "Observing {SQL} queries in their natural habitat", journal = j-TODS, volume = "38", number = "1", pages = "3:1--3:??", month = apr, year = "2013", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Apr 23 18:06:18 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We describe Habitat, a declarative observational debugger for SQL. Habitat facilitates true language-level (not: plan-level) debugging of, probably flawed, SQL queries that yield unexpected results. Users mark SQL subexpressions of arbitrary size and then observe whether these evaluate as expected. Habitat understands query nesting and free row variables in correlated subqueries, and generally aims to not constrain users while suspect subexpressions are marked for observation. From the marked SQL text, Habitat's algebraic compiler derives a new query whose result represents the values of the desired observations. These observations are generated by the target SQL database host itself and are derived from the original data: Habitat does not require prior data extraction or extra debugging middleware. Experiments with TPC-H database instances indicate that observations impose a runtime overhead sufficiently low to allow for interactive debugging sessions.", acknowledgement = ack-nhfb, articleno = "3", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Benzaken:2013:OXQ, author = "V{\'e}ronique Benzaken and Giuseppe Castagna and Dario Colazzo and Kim Nguy{\'{\^e}}n", title = "Optimizing {XML} querying using type-based document projection", journal = j-TODS, volume = "38", number = "1", pages = "4:1--4:??", month = apr, year = "2013", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Apr 23 18:06:18 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "XML data projection (or pruning) is a natural optimization for main memory query engines: given a query Q over a document D, the subtrees of D that are not necessary to evaluate Q are pruned, thus producing a smaller document D'; the query Q is then executed on D', hence avoiding to allocate and process nodes that will never be reached by Q. In this article, we propose a new approach, based on types, that greatly improves current solutions. Besides providing comparable or greater precision and far lesser pruning overhead, our solution-unlike current approaches-takes into account backward axes, predicates, and can be applied to multiple queries rather than just to single ones. A side contribution is a new type system for XPath able to handle backward axes. The soundness of our approach is formally proved. Furthermore, we prove that the approach is also complete (i.e., yields the best possible type-driven pruning) for a relevant class of queries and Schemas. We further validate our approach using the XMark and XPathMark benchmarks and show that pruning not only improves the main memory query engine's performances (as expected) but also those of state of the art native XML databases.", acknowledgement = ack-nhfb, articleno = "4", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Das:2013:EES, author = "Sudipto Das and Divyakant Agrawal and Amr {El Abbadi}", title = "{ElasTraS}: an elastic, scalable, and self-managing transactional database for the cloud", journal = j-TODS, volume = "38", number = "1", pages = "5:1--5:??", month = apr, year = "2013", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Apr 23 18:06:18 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "A database management system (DBMS) serving a cloud platform must handle large numbers of application databases (or tenants) that are characterized by diverse schemas, varying footprints, and unpredictable load patterns. Scaling out using clusters of commodity servers and sharing resources among tenants (i.e., multitenancy) are important features of such systems. Moreover, when deployed on a pay-per-use infrastructure, minimizing the system's operating cost while ensuring good performance is also an important goal. Traditional DBMSs were not designed for such scenarios and hence do not possess the mentioned features critical for DBMSs in the cloud. We present ElasTraS, which combines three design principles to build an elastically-scalable multitenant DBMS for transaction processing workloads. These design principles are gleaned from a careful analysis of the years of research in building scalable key-value stores and decades of research in high performance transaction processing systems. ElasTraS scales to thousands of tenants, effectively consolidates tenants with small footprints while scaling-out large tenants across multiple servers in a cluster. ElasTraS also supports low-latency multistep ACID transactions, is fault-tolerant, self-managing, and highly available to support mission critical applications. ElasTraS leverages Albatross, a low overhead on-demand live database migration technique, for elastic load balancing by adding more servers during high load and consolidating to fewer servers during usage troughs. This elastic scaling minimizes the operating cost and ensures good performance even in the presence of unpredictable changes to the workload. We elucidate the design principles, explain the architecture, describe a prototype implementation, present the detailed design and implementation of Albatross, and experimentally evaluate the implementation using a variety of transaction processing workloads. On a cluster of 20 commodity servers, our prototype serves thousands of tenants and serves more than 1 billion transactions per day while migrating tenant databases with minimal overhead to allow lightweight elastic scaling. Using a cluster of 30 commodity servers, ElasTraS can scale-out a terabyte TPC-C database serving an aggregate throughput of approximately one quarter of a million TPC-C transactions per minute.", acknowledgement = ack-nhfb, articleno = "5", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Franceschet:2013:GTA, author = "Massimo Franceschet and Donatella Gubiani and Angelo Montanari and Carla Piazza", title = "A graph-theoretic approach to map conceptual designs to {XML} schemas", journal = j-TODS, volume = "38", number = "1", pages = "6:1--6:??", month = apr, year = "2013", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Apr 23 18:06:18 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We propose a mapping from a database conceptual design to a schema for XML that produces highly connected and nested XML structures. We first introduce two alternative definitions of the mapping, one modeling entities as global XML elements and expressing relationships among them in terms of keys and key references (flat design), the other one encoding relationships by properly including the elements for some entities into the elements for other entities (nest design). Then we provide a benchmark evaluation of the two solutions showing that the nest approach, compared to the flat one, leads to improvements in both query and validation performances. This motivates us to systematically investigate the best way to nest XML structures. We identify two different nesting solutions: a maximum depth nesting, that keeps low the number of costly join operations that are necessary to reconstruct information at query time using the mapped schema, and a maximum density nesting, that minimizes the number of schema constraints used in the mapping of the conceptual schema, thus reducing the validation overhead. On the one hand, the problem of finding a maximum depth nesting turns out to be NP-complete and, moreover, it admits no constant ratio approximation algorithm. On the other hand, we devise a graph-theoretic algorithm, NiduX, that solves the maximum density problem in linear time. Interestingly, NiduX finds the optimal solution for the harder maximum depth problem whenever the conceptual design graph is either acyclic or complete. In randomly generated intermediate cases of the graph topology, we experimentally show that NiduX finds a good approximation of the optimal solution.", acknowledgement = ack-nhfb, articleno = "6", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Wu:2013:MSK, author = "Dingming Wu and Man Lung Yiu and Christian S. Jensen", title = "Moving spatial keyword queries: Formulation, methods, and analysis", journal = j-TODS, volume = "38", number = "1", pages = "7:1--7:??", month = apr, year = "2013", CODEN = "ATDSD3", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Apr 23 18:06:18 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Web users and content are increasingly being geo-positioned. This development gives prominence to spatial keyword queries, which involve both the locations and textual descriptions of content. We study the efficient processing of continuously moving top- k spatial keyword (M k SK) queries over spatial text data. State-of-the-art solutions for moving queries employ safe zones that guarantee the validity of reported results as long as the user remains within the safe zone associated with a result. However, existing safe-zone methods focus solely on spatial locations and ignore text relevancy. We propose two algorithms for computing safe zones that guarantee correct results at any time and that aim to optimize the server-side computation as well as the communication between the server and the client. We exploit tight and conservative approximations of safe zones and aggressive computational space pruning. We present techniques that aim to compute the next safe zone efficiently, and we present two types of conservative safe zones that aim to reduce the communication cost. Empirical studies with real data suggest that the proposals are efficient. To understand the effectiveness of the proposed safe zones, we study analytically the expected area of a safe zone, which indicates on average for how long a safe zone remains valid, and we study the expected number of influence objects needed to define a safe zone, which gives an estimate of the average communication cost. The analytical modeling is validated through empirical studies.", acknowledgement = ack-nhfb, articleno = "7", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Sadoghi:2013:AOB, author = "Mohammad Sadoghi and Hans-Arno Jacobsen", title = "Analysis and optimization for {Boolean} expression indexing", journal = j-TODS, volume = "38", number = "2", pages = "8:1--8:??", month = jun, year = "2013", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2487259.2487260", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Jul 1 18:44:25 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "BE-Tree is a novel dynamic data structure designed to efficiently index Boolean expressions over a high-dimensional discrete space. BE Tree-copes with both high-dimensionality and expressiveness of Boolean expressions by introducing an effective two-phase space-cutting technique that specifically utilizes the discrete and finite domain properties of the space. Furthermore, BE-Tree employs self-adjustment policies to dynamically adapt the tree as the workload changes. Moreover, in BE-Tree, we develop two novel cache-conscious predicate evaluation techniques, namely, lazy and bitmap evaluations, that also exploit the underlying discrete and finite space to substantially reduce BE-Tree's matching time by up to 75\% BE-Tree is a general index structure for matching Boolean expression which has a wide range of applications including (complex) event processing, publish/subscribe matching, emerging applications in cospaces, profile matching for targeted web advertising, and approximate string matching. Finally, the superiority of BE-Tree is proven through a comprehensive evaluation with state-of-the-art index structures designed for matching Boolean expressions.", acknowledgement = ack-nhfb, articleno = "8", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Li:2013:PBM, author = "Guoliang Li and Dong Deng and Jianhua Feng", title = "A partition-based method for string similarity joins with edit-distance constraints", journal = j-TODS, volume = "38", number = "2", pages = "9:1--9:??", month = jun, year = "2013", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2487259.2487261", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Jul 1 18:44:25 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "As an essential operation in data cleaning, the similarity join has attracted considerable attention from the database community. In this article, we study string similarity joins with edit-distance constraints, which find similar string pairs from two large sets of strings whose edit distance is within a given threshold. Existing algorithms are efficient either for short strings or for long strings, and there is no algorithm that can efficiently and adaptively support both short strings and long strings. To address this problem, we propose a new filter, called the segment filter. We partition a string into a set of segments and use the segments as a filter to find similar string pairs. We first create inverted indices for the segments. Then for each string, we select some of its substrings, identify the selected substrings from the inverted indices, and take strings on the inverted lists of the found substrings as candidates of this string. Finally, we verify the candidates to generate the final answer. We devise efficient techniques to select substrings and prove that our method can minimize the number of selected substrings. We develop novel pruning techniques to efficiently verify the candidates. We also extend our techniques to support normalized edit distance. Experimental results show that our algorithms are efficient for both short strings and long strings, and outperform state-of-the-art methods on real-world datasets.", acknowledgement = ack-nhfb, articleno = "9", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Catallo:2013:TKD, author = "Ilio Catallo and Eleonora Ciceri and Piero Fraternali and Davide Martinenghi and Marco Tagliasacchi", title = "Top-$k$ diversity queries over bounded regions", journal = j-TODS, volume = "38", number = "2", pages = "10:1--10:??", month = jun, year = "2013", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2487259.2487262", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Jul 1 18:44:25 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Top-k diversity queries over objects embedded in a low-dimensional vector space aim to retrieve the best k objects that are both relevant to given user's criteria and well distributed over a designated region. An interesting case is provided by spatial Web objects, which are produced in great quantity by location-based services that let users attach content to places and are found also in domains like trip planning, news analysis, and real estate. In this article we present a technique for addressing such queries that, unlike existing methods for diversified top- k queries, does not require accessing and scanning all relevant objects in order to find the best k results. Our Space Partitioning and Probing (SPP) algorithm works by progressively exploring the vector space, while keeping track of the already seen objects and of their relevance and position. The goal is to provide a good quality result set in terms of both relevance and diversity. We assess quality by using as a baseline the result set computed by MMR, one of the most popular diversification algorithms, while minimizing the number of accessed objects. In order to do so, SPP exploits score-based and distance-based access methods, which are available, for instance, in most geo-referenced Web data sources. Experiments with both synthetic and real data show that SPP produces results that are relevant and spatially well distributed, while significantly reducing the number of accessed objects and incurring a very low computational overhead.", acknowledgement = ack-nhfb, articleno = "10", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Fazzinga:2013:RDC, author = "Bettina Fazzinga and Sergio Flesca and Filippo Furfaro and Elio Masciari", title = "{RFID}-data compression for supporting aggregate queries", journal = j-TODS, volume = "38", number = "2", pages = "11:1--11:??", month = jun, year = "2013", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2487259.2487263", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Jul 1 18:44:25 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "RFID-based systems for object tracking and supply chain management have been emerging since the RFID technology proved effective in monitoring movements of objects. The monitoring activity typically results in huge numbers of readings, thus making the problem of efficiently retrieving aggregate information from the collected data a challenging issue. In fact, tackling this problem is of crucial importance, as fast answers to aggregate queries are often mandatory to support the decision making process. In this regard, a compression technique for RFID data is proposed, and used as the core of a system supporting the efficient estimation of aggregate queries. Specifically, this technique aims at constructing a lossy synopsis of the data over which aggregate queries can be estimated, without accessing the original data. Owing to the lossy nature of the compression, query estimates are approximate, and are returned along with intervals that are guaranteed to contain the exact query answers. The effectiveness of the proposed approach has been experimentally validated, showing a remarkable trade-off between the efficiency and the accuracy of the query estimation.", acknowledgement = ack-nhfb, articleno = "11", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Pavlou:2013:GDF, author = "Kyriacos E. Pavlou and Richard T. Snodgrass", title = "Generalizing database forensics", journal = j-TODS, volume = "38", number = "2", pages = "12:1--12:??", month = jun, year = "2013", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2487259.2487264", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Jul 1 18:44:25 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In this article we present refinements on previously proposed approaches to forensic analysis of database tampering. We significantly generalize the basic structure of these algorithms to admit new characterizations of the ``where'' axis of the corruption diagram. Specifically, we introduce page-based partitioning as well as attribute-based partitioning along with their associated corruption diagrams. We compare the structure of all the forensic analysis algorithms and discuss the various design choices available with respect to forensic analysis. We characterize the forensic cost of the newly introduced algorithms, compare their forensic cost, and give our recommendations. We then introduce a comprehensive taxonomy of the types of possible corruption events, along with an associated forensic analysis protocol that consolidates all extant forensic algorithms and the corresponding type(s) of corruption events they detect. The result is a generalization of these algorithms and an overarching characterization of the process of database forensic analysis, thus providing a context within the overall operation of a DBMS for all existing forensic analysis algorithms.", acknowledgement = ack-nhfb, articleno = "12", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Cheney:2013:RFN, author = "James Cheney", title = "Revisiting ``forward node-selecting queries over trees''", journal = j-TODS, volume = "38", number = "2", pages = "13:1--13:??", month = jun, year = "2013", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2487259.2487265", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Jul 1 18:44:25 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In ``Forward Node-Selecting Queries over Trees,'' Olteanu [2007] gives three rewriting systems for eliminating reverse XPath axis steps from node-selecting queries over trees, together with arguments for their correctness and termination for a large class of input graphs, including cyclic ones. These proofs are valid for tree or acyclic formulas, but two of the rewrite systems ( TRS$_2$ and TRS$_3$ ) do not terminate on cyclic graphs; that is, there are infinite rewrite sequences that never yield a normal form. We investigate the reasons why the termination arguments do not work for general cyclic formulas, and develop alternative algorithms that can be used instead. We prove that TRS$_2$ is weakly normalizing, while TRS$_3$ is not weakly normalizing, but it can be extended to a weakly normalizing system TRS$_3^\ocirc $. The algorithms and proof techniques illustrate unforeseen subtleties in the handling of cyclic queries.", acknowledgement = ack-nhfb, articleno = "13", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Bonifati:2013:AIM, author = "Angela Bonifati and Martin Goodfellow and Ioana Manolescu and Domenica Sileo", title = "Algebraic incremental maintenance of {XML} views", journal = j-TODS, volume = "38", number = "3", pages = "14:1--14:??", month = aug, year = "2013", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2508020.2508021", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 30 16:33:21 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Materialized views can bring important performance benefits when querying XML documents. In the presence of XML document changes, materialized views need to be updated to faithfully reflect the changed document. In this work, we present an algebraic approach for propagating source updates to XML materialized views expressed in a powerful XML tree pattern formalism. Our approach differs from the state-of-the-art in the area in two important ways. First, it relies on set-oriented, algebraic operations, to be contrasted with node-based previous approaches. Second, it exploits state-of-the-art features of XML stores and XML query evaluation engines, notably XML structural identifiers and associated structural join algorithms. We present algorithms for determining how updates should be propagated to views, and highlight the benefits of our approach over existing algorithms through a series of experiments.", acknowledgement = ack-nhfb, articleno = "14", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Colazzo:2013:ALI, author = "Dario Colazzo and Giorgio Ghelli and Luca Pardini and Carlo Sartiani", title = "Almost-linear inclusion for {XML} regular expression types", journal = j-TODS, volume = "38", number = "3", pages = "15:1--15:??", month = aug, year = "2013", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2508020.2508022", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 30 16:33:21 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/string-matching.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Type inclusion is a fundamental operation in every type-checking compiler, but it is quite expensive for XML manipulation languages. A polynomial inclusion checking algorithm for an expressive family of XML type languages is known, but it runs in quadratic time both in the best and in the worst cases. We present here an algorithm that has a linear-time backbone, and resorts to the quadratic approach for some specific parts of the compared types. Our experiments show that the new algorithm is much faster than the quadratic one, and that it typically runs in linear time, hence it can be used as a building block for a practical type-checking compiler.", acknowledgement = ack-nhfb, articleno = "15", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Qin:2013:ASS, author = "Jianbin Qin and Wei Wang and Chuan Xiao and Yifei Lu and Xuemin Lin and Haixun Wang", title = "Asymmetric signature schemes for efficient exact edit similarity query processing", journal = j-TODS, volume = "38", number = "3", pages = "16:1--16:??", month = aug, year = "2013", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2508020.2508023", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 30 16:33:21 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/string-matching.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Given a query string Q, an edit similarity search finds all strings in a database whose edit distance with {$Q$} is no more than a given threshold $ \tau $. Most existing methods answering edit similarity queries employ schemes to generate string subsequences as signatures and generate candidates by set overlap queries on query and data signatures. In this article, we show that for any such signature scheme, the lower bound of the minimum number of signatures is $ \tau + 1 $, which is lower than what is achieved by existing methods. We then propose several asymmetric signature schemes, that is, extracting different numbers of signatures for the data and query strings, which achieve this lower bound. A basic asymmetric scheme is first established on the basis of matching $q$-chunks and $q$-grams between two strings. Two efficient query processing algorithms (IndexGram and IndexChunk) are developed on top of this scheme. We also propose novel candidate pruning methods to further improve the efficiency. We then generalize the basic scheme by incorporating novel ideas of floating $q$-chunks, optimal selection of $q$-chunks, and reducing the number of signatures using global ordering. As a result, the Super and Turbo families of schemes are developed together with their corresponding query processing algorithms. We have conducted a comprehensive experimental study using the six asymmetric algorithms and nine previous state-of-the-art algorithms. The experiment results clearly showcase the efficiency of our methods and demonstrate space and time characteristics of our proposed algorithms.", acknowledgement = ack-nhfb, articleno = "16", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Levandoski:2013:FEP, author = "Justin J. Levandoski and Ahmed Eldawy and Mohamed F. Mokbel and Mohamed E. Khalefa", title = "Flexible and extensible preference evaluation in database systems", journal = j-TODS, volume = "38", number = "3", pages = "17:1--17:??", month = aug, year = "2013", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2493268", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 30 16:33:21 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Personalized database systems give users answers tailored to their personal preferences. While numerous preference evaluation methods for databases have been proposed (e.g., skyline, top-k, k-dominance, k-frequency), the implementation of these methods at the core of a database system is a double-edged sword. Core implementation provides efficient query processing for arbitrary database queries, however, this approach is not practical since each existing (and future) preference method requires implementation within the database engine. To solve this problem, this article introduces FlexPref, a framework for extensible preference evaluation in database systems. FlexPref, implemented in the query processor, aims to support a wide array of preference evaluation methods in a single extensible code base. Integration with FlexPref is simple, involving the registration of only three functions that capture the essence of the preference method. Once integrated, the preference method ``lives'' at the core of the database, enabling the efficient execution of preference queries involving common database operations. This article also provides a query optimization framework for FlexPref, as well as a theoretical framework that defines the properties a preference method must exhibit to be implemented in FlexPref. To demonstrate the extensibility of FlexPref, this article also provides case studies detailing the implementation of seven state-of-the-art preference evaluation methods within FlexPref. We also experimentally study the strengths and weaknesses of an implementation of FlexPref in PostgreSQL over a range of single-table and multitable preference queries.", acknowledgement = ack-nhfb, articleno = "17", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Fan:2013:IGP, author = "Wenfei Fan and Xin Wang and Yinghui Wu", title = "Incremental graph pattern matching", journal = j-TODS, volume = "38", number = "3", pages = "18:1--18:??", month = aug, year = "2013", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2489791", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 30 16:33:21 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/string-matching.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Graph pattern matching is commonly used in a variety of emerging applications such as social network analysis. These applications highlight the need for studying the following two issues. First, graph pattern matching is traditionally defined in terms of subgraph isomorphism or graph simulation. These notions, however, often impose too strong a topological constraint on graphs to identify meaningful matches. Second, in practice a graph is typically large, and is frequently updated with small changes. It is often prohibitively expensive to recompute matches starting from scratch via batch algorithms when the graph is updated. This article studies these two issues. (1) We propose to define graph pattern matching based on a notion of bounded simulation, which extends graph simulation by specifying the connectivity of nodes in a graph within a predefined number of hops. We show that bounded simulation is able to find sensible matches that the traditional matching notions fail to catch. We also show that matching via bounded simulation is in cubic time, by giving such an algorithm. (2) We provide an account of results on incremental graph pattern matching, for matching defined with graph simulation, bounded simulation, and subgraph isomorphism. We show that the incremental matching problem is unbounded, that is, its cost is not determined alone by the size of the changes in the input and output, for all these matching notions. Nonetheless, when matching is defined in terms of simulation or bounded simulation, incremental matching is semibounded, that is, its worst-time complexity is bounded by a polynomial in the size of the changes in the input, output, and auxiliary information that is necessarily maintained to reuse previous computation, and the size of graph patterns. We also develop incremental matching algorithms for graph simulation and bounded simulation, by minimizing unnecessary recomputation. In contrast, matching based on subgraph isomorphism is neither bounded nor semibounded. (3) We experimentally verify the effectiveness and efficiency of these algorithms, and show that: (a) the revised notion of graph pattern matching allows us to identify communities commonly found in real-life networks, and (b) the incremental algorithms substantially outperform their batch counterparts in response to small changes. These suggest a promising framework for real-life graph pattern matching.", acknowledgement = ack-nhfb, articleno = "18", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Karvounarakis:2013:CDS, author = "Grigoris Karvounarakis and Todd J. Green and Zachary G. Ives and Val Tannen", title = "Collaborative data sharing via update exchange and provenance", journal = j-TODS, volume = "38", number = "3", pages = "19:1--19:??", month = aug, year = "2013", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2500127", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 30 16:33:21 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Recent work [Ives et al. 2005] proposed a new class of systems for supporting data sharing among scientific and other collaborations: this new collaborative data sharing system connects heterogeneous logical peers using a network of schema mappings. Each peer has a locally controlled and edited database instance, but wants to incorporate related data from other peers as well. To achieve this, every peer's data and updates propagate along the mappings to the other peers. However, this operation, termed update exchange, is filtered by trust conditions -expressing what data and sources a peer judges to be authoritative-which may cause a peer to reject another's updates. In order to support such filtering, updates carry provenance information. This article develops methods for realizing such systems: we build upon techniques from data integration, data exchange, incremental view maintenance, and view update to propagate updates along mappings, both to derived and optionally to source instances. We incorporate a novel model for tracking data provenance, such that curators may filter updates based on trust conditions over this provenance. We implement our techniques in a layer above an off-the-shelf RDBMS, and we experimentally demonstrate the viability of these techniques in the Orchestra prototype system.", acknowledgement = ack-nhfb, articleno = "19", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Ozsoyoglu:2013:FIP, author = "Z. Meral {\"O}zsoyoglu", title = "Foreword to invited papers issue", journal = j-TODS, volume = "38", number = "4", pages = "20:1--20:??", month = nov, year = "2013", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2539032.2539033", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Dec 9 11:35:10 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "20", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Mozafari:2013:HPC, author = "Barzan Mozafari and Kai Zeng and Loris D'antoni and Carlo Zaniolo", title = "High-performance complex event processing over hierarchical data", journal = j-TODS, volume = "38", number = "4", pages = "21:1--21:??", month = nov, year = "2013", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2536779", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Dec 9 11:35:10 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "While Complex Event Processing (CEP) constitutes a considerable portion of the so-called Big Data analytics, current CEP systems can only process data having a simple structure, and are otherwise limited in their ability to efficiently support complex continuous queries on structured or semistructured information. However, XML-like streams represent a very popular form of data exchange, comprising large portions of social network and RSS feeds, financial feeds, configuration files, and similar applications requiring advanced CEP queries. In this article, we present the XSeq language and system that support CEP on XML streams, via an extension of XPath that is both powerful and amenable to an efficient implementation. Specifically, the XSeq language extends XPath with natural operators to express sequential and Kleene-* patterns over XML streams, while remaining highly amenable to efficient execution. In fact, XSeq is designed to take full advantage of the recently proposed Visibly Pushdown Automata (VPA), where higher expressive power can be achieved without compromising the computationally attractive properties of finite state automata. Besides the efficiency and expressivity benefits, the choice of VPA as the underlying model also enables XSeq to go beyond XML streams and be easily applicable to any data with both sequential and hierarchical structures, including JSON messages, RNA sequences, and software traces. Therefore, we illustrate the XSeq's power for CEP applications through examples from different domains and provide formal results on its expressiveness and complexity. Finally, we present several optimization techniques for XSeq queries. Our extensive experiments indicate that XSeq brings outstanding performance to CEP applications: two orders of magnitude improvement is obtained over the same queries executed in general-purpose XML engines.", acknowledgement = ack-nhfb, articleno = "21", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Sarma:2013:CTL, author = "Anish Das Sarma and Hongrae Lee and Hector Gonzalez and Jayant Madhavan and Alon Halevy", title = "Consistent thinning of large geographical data for map visualization", journal = j-TODS, volume = "38", number = "4", pages = "22:1--22:??", month = nov, year = "2013", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2539032.2539034", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Dec 9 11:35:10 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Large-scale map visualization systems play an increasingly important role in presenting geographic datasets to end-users. Since these datasets can be extremely large, a map rendering system often needs to select a small fraction of the data to visualize them in a limited space. This article addresses the fundamental challenge of thinning: determining appropriate samples of data to be shown on specific geographical regions and zoom levels. Other than the sheer scale of the data, the thinning problem is challenging because of a number of other reasons: (1) data can consist of complex geographical shapes, (2) rendering of data needs to satisfy certain constraints, such as data being preserved across zoom levels and adjacent regions, and (3) after satisfying the constraints, an optimal solution needs to be chosen based on objectives such as maximality, fairness, and importance of data. This article formally defines and presents a complete solution to the thinning problem. First, we express the problem as an integer programming formulation that efficiently solves thinning for desired objectives. Second, we present more efficient solutions for maximality, based on DFS traversal of a spatial tree. Third, we consider the common special case of point datasets, and present an even more efficient randomized algorithm. Fourth, we show that contiguous regions are tractable for a general version of maximality for which arbitrary regions are intractable. Fifth, we examine the structure of our integer programming formulation and show that for point datasets, our program is integral. Finally, we have implemented all techniques from this article in Google Maps [Google 2005] visualizations of fusion tables [Gonzalez et al. 2010], and we describe a set of experiments that demonstrate the trade-offs among the algorithms.", acknowledgement = ack-nhfb, articleno = "22", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Teubner:2013:XFB, author = "Jens Teubner and Louis Woods and Chongling Nie", title = "{XLynx} --- An {FPGA}-based {XML} filter for hybrid {XQuery} processing", journal = j-TODS, volume = "38", number = "4", pages = "23:1--23:??", month = nov, year = "2013", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2536800", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Dec 9 11:35:10 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "While offering unique performance and energy-saving advantages, the use of Field-Programmable Gate Arrays (FPGAs) for database acceleration has demanded major concessions from system designers. Either the programmable chips have been used for very basic application tasks (such as implementing a rigid class of selection predicates) or their circuit definition had to be completely recompiled at runtime-a very CPU-intensive and time-consuming effort. This work eliminates the need for such concessions. As part of our XLynx implementation-an FPGA-based XML filter-we present skeleton automata, which is a design principle for data-intensive hardware circuits that offers high expressiveness and quick reconfiguration at the same time. Skeleton automata provide a generic implementation for a class of finite-state automata. They can be parameterized to any particular automaton instance in a matter of microseconds or less (as opposed to minutes or hours for complete recompilation). We showcase skeleton automata based on XML projection [Marian and Sim{\'e}on 2003], a filtering technique that illustrates the feasibility of our strategy for a real-world and challenging task. By performing XML projection in hardware and filtering data in the network, we report on performance improvements of several factors while remaining nonintrusive to the back-end XML processor (we evaluate XLynx using the Saxon engine).", acknowledgement = ack-nhfb, articleno = "23", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Losemann:2013:CRE, author = "Katja Losemann and Wim Martens", title = "The complexity of regular expressions and property paths in {SPARQL}", journal = j-TODS, volume = "38", number = "4", pages = "24:1--24:??", month = nov, year = "2013", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2494529", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Dec 9 11:35:10 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/string-matching.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The World Wide Web Consortium (W3C) recently introduced property paths in SPARQL 1.1, a query language for RDF data. Property paths allow SPARQL queries to evaluate regular expressions over graph-structured data. However, they differ from standard regular expressions in several notable aspects. For example, they have a limited form of negation, they have numerical occurrence indicators as syntactic sugar, and their semantics on graphs is defined in a nonstandard manner. We formalize the W3C semantics of property paths and investigate various query evaluation problems on graphs. More specifically, let $x$ and $y$ be two nodes in an edge-labeled graph and $r$ be an expression. We study the complexities of: (1) deciding whether there exists a path from $x$ to $y$ that matches $r$ and (2) counting how many paths from $x$ to $y$ match $r$. Our main results show that, compared to an alternative semantics of regular expressions on graphs, the complexity of (1) and (2) under W3C semantics is significantly higher. Whereas the alternative semantics remains in polynomial time for large fragments of expressions, the W3C semantics makes problems (1) and (2) intractable almost immediately. As a side-result, we prove that the membership problem for regular expressions with numerical occurrence indicators and negation is in polynomial time.", acknowledgement = ack-nhfb, articleno = "24", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Letelier:2013:SAO, author = "Andr{\'e}s Letelier and Jorge P{\'e}rez and Reinhard Pichler and Sebastian Skritek", title = "Static analysis and optimization of {Semantic Web} queries", journal = j-TODS, volume = "38", number = "4", pages = "25:1--25:??", month = nov, year = "2013", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2500130", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Dec 9 11:35:10 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Static analysis is a fundamental task in query optimization. In this article we study static analysis and optimization techniques for SPARQL, which is the standard language for querying Semantic Web data. Of particular interest for us is the optionality feature in SPARQL. It is crucial in Semantic Web data management, where data sources are inherently incomplete and the user is usually interested in partial answers to queries. This feature is one of the most complicated constructors in SPARQL and also the one that makes this language depart from classical query languages such as relational conjunctive queries. We focus on the class of well-designed SPARQL queries, which has been proposed in the literature as a fragment of the language with good properties regarding query evaluation. We first propose a tree representation for SPARQL queries, called pattern trees, which captures the class of well-designed SPARQL graph patterns. Among other results, we propose several rules that can be used to transform pattern trees into a simple normal form, and study equivalence and containment. We also study the evaluation and enumeration problems for this class of queries.", acknowledgement = ack-nhfb, articleno = "25", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Agarwal:2013:MS, author = "Pankaj K. Agarwal and Graham Cormode and Zengfeng Huang and Jeff M. Phillips and Zhewei Wei and Ke Yi", title = "Mergeable summaries", journal = j-TODS, volume = "38", number = "4", pages = "26:1--26:??", month = nov, year = "2013", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2500128", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Dec 9 11:35:10 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We study the mergeability of data summaries. Informally speaking, mergeability requires that, given two summaries on two datasets, there is a way to merge the two summaries into a single summary on the two datasets combined together, while preserving the error and size guarantees. This property means that the summaries can be merged in a way akin to other algebraic operators such as sum and max, which is especially useful for computing summaries on massive distributed data. Several data summaries are trivially mergeable by construction, most notably all the sketches that are linear functions of the datasets. But some other fundamental ones, like those for heavy hitters and quantiles, are not (known to be) mergeable. In this article, we demonstrate that these summaries are indeed mergeable or can be made mergeable after appropriate modifications. Specifically, we show that for $ \epsilon $-approximate heavy hitters, there is a deterministic mergeable summary of size $ O (1 / \epsilon) $; for $ \epsilon $-approximate quantiles, there is a deterministic summary of size $ O((1 / \epsilon) \log (\epsilon n)) $ that has a restricted form of mergeability, and a randomized one of size $ O((1 / \epsilon) \log^{3 / 2} (1 / \epsilon)) $ with full mergeability. We also extend our results to geometric summaries such as \epsilon -approximations which permit approximate multidimensional range counting queries. While most of the results in this article are theoretical in nature, some of the algorithms are actually very simple and even perform better than the previously best known algorithms, which we demonstrate through experiments in a simulated sensor network. We also achieve two results of independent interest: (1) we provide the best known randomized streaming bound for $ \epsilon $-approximate quantiles that depends only on $ \epsilon $, of size $ O((1 / \epsilon) \log^{3 / 2} (1 / \epsilon)) $, and (2) we demonstrate that the MG and the SpaceSaving summaries for heavy hitters are isomorphic.", acknowledgement = ack-nhfb, articleno = "26", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Konrad:2013:VXD, author = "Christian Konrad and Fr{\'e}d{\'e}ric Magniez", title = "Validating {XML} documents in the streaming model with external memory", journal = j-TODS, volume = "38", number = "4", pages = "27:1--27:??", month = nov, year = "2013", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2504590", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Dec 9 11:35:10 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We study the problem of validating XML documents of size N against general DTDs in the context of streaming algorithms. The starting point of this work is a well-known space lower bound. There are XML documents and DTDs for which p -pass streaming algorithms require $ \Omega (N / p) $ space. We show that when allowing access to external memory, there is a deterministic streaming algorithm that solves this problem with memory space $ O(\log^2 N) $, a constant number of auxiliary read/write streams, and $ O(\log N) $ total number of passes on the XML document and auxiliary streams. An important intermediate step of this algorithm is the computation of the First-Child-Next-Sibling (FCNS) encoding of the initial XML document in a streaming fashion. We study this problem independently, and we also provide memory-efficient streaming algorithms for decoding an XML document given in its FCNS encoding. Furthermore, validating XML documents encoding binary trees against any DTD in the usual streaming model without external memory can be done with sublinear memory. There is a one-pass algorithm using $ O(\sqrt N \log N) $ space, and a bidirectional two-pass algorithm using $ O(\log^2 N) $ space which perform this task.", acknowledgement = ack-nhfb, articleno = "27", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Cate:2013:LSM, author = "Balder {Ten Cate} and V{\'\i}ctor Dalmau and Phokion G. Kolaitis", title = "Learning schema mappings", journal = j-TODS, volume = "38", number = "4", pages = "28:1--28:??", month = nov, year = "2013", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2539032.2539035", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Dec 9 11:35:10 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "A schema mapping is a high-level specification of the relationship between a source schema and a target schema. Recently, a line of research has emerged that aims at deriving schema mappings automatically or semi-automatically with the help of data examples, that is, pairs consisting of a source instance and a target instance that depict, in some precise sense, the intended behavior of the schema mapping. Several different uses of data examples for deriving, refining, or illustrating a schema mapping have already been proposed and studied. In this article, we use the lens of computational learning theory to systematically investigate the problem of obtaining algorithmically a schema mapping from data examples. Our aim is to leverage the rich body of work on learning theory in order to develop a framework for exploring the power and the limitations of the various algorithmic methods for obtaining schema mappings from data examples. We focus on GAV schema mappings, that is, schema mappings specified by GAV (Global-As-View) constraints. GAV constraints are the most basic and the most widely supported language for specifying schema mappings. We present an efficient algorithm for learning GAV schema mappings using Angluin's model of exact learning with membership and equivalence queries. This is optimal, since we show that neither membership queries nor equivalence queries suffice, unless the source schema consists of unary relations only. We also obtain results concerning the learnability of schema mappings in the context of Valiant's well-known PAC (Probably-Approximately-Correct) learning model, and concerning the learnability of restricted classes of GAV schema mappings. Finally, as a byproduct of our work, we show that there is no efficient algorithm for approximating the shortest GAV schema mapping fitting a given set of examples, unless the source schema consists of unary relations only.", acknowledgement = ack-nhfb, articleno = "28", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Kostylev:2014:CAS, author = "Egor V. Kostylev and Juan L. Reutter and Andr{\'a}s Z. Salamon", title = "Classification of annotation semirings over containment of conjunctive queries", journal = j-TODS, volume = "39", number = "1", pages = "1:1--1:??", month = jan, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2556524", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Feb 5 11:31:16 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We study the problem of query containment of conjunctive queries over annotated databases. Annotations are typically attached to tuples and represent metadata, such as probability, multiplicity, comments, or provenance. It is usually assumed that annotations are drawn from a commutative semiring. Such databases pose new challenges in query optimization, since many related fundamental tasks, such as query containment, have to be reconsidered in the presence of propagation of annotations. We axiomatize several classes of semirings for each of which containment of conjunctive queries is equivalent to existence of a particular type of homomorphism. For each of these types, we also specify all semirings for which existence of a corresponding homomorphism is a sufficient (or necessary) condition for the containment. We develop new decision procedures for containment for some semirings which are not in any of these classes. This generalizes and systematizes previous approaches.", acknowledgement = ack-nhfb, articleno = "1", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Yi:2014:ISQ, author = "Ke Yi and Lu Wang and Zhewei Wei", title = "Indexing for summary queries: Theory and practice", journal = j-TODS, volume = "39", number = "1", pages = "2:1--2:??", month = jan, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2508702", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Feb 5 11:31:16 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Database queries can be broadly classified into two categories: reporting queries and aggregation queries. The former retrieves a collection of records from the database that match the query's conditions, while the latter returns an aggregate, such as count, sum, average, or max (min), of a particular attribute of these records. Aggregation queries are especially useful in business intelligence and data analysis applications where users are interested not in the actual records, but some statistics of them. They can also be executed much more efficiently than reporting queries, by embedding properly precomputed aggregates into an index. However, reporting and aggregation queries provide only two extremes for exploring the data. Data analysts often need more insight into the data distribution than what those simple aggregates provide, and yet certainly do not want the sheer volume of data returned by reporting queries. In this article, we design indexing techniques that allow for extracting a statistical summary of all the records in the query. The summaries we support include frequent items, quantiles, and various sketches, all of which are of central importance in massive data analysis. Our indexes require linear space and extract a summary with the optimal or near-optimal query cost. We illustrate the efficiency and usefulness of our designs through extensive experiments and a system demonstration.", acknowledgement = ack-nhfb, articleno = "2", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Kifer:2014:PFM, author = "Daniel Kifer and Ashwin Machanavajjhala", title = "{Pufferfish}: a framework for mathematical privacy definitions", journal = j-TODS, volume = "39", number = "1", pages = "3:1--3:??", month = jan, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2514689", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Feb 5 11:31:16 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In this article, we introduce a new and general privacy framework called Pufferfish. The Pufferfish framework can be used to create new privacy definitions that are customized to the needs of a given application. The goal of Pufferfish is to allow experts in an application domain, who frequently do not have expertise in privacy, to develop rigorous privacy definitions for their data sharing needs. In addition to this, the Pufferfish framework can also be used to study existing privacy definitions. We illustrate the benefits with several applications of this privacy framework: we use it to analyze differential privacy and formalize a connection to attackers who believe that the data records are independent; we use it to create a privacy definition called hedging privacy, which can be used to rule out attackers whose prior beliefs are inconsistent with the data; we use the framework to define and study the notion of composition in a broader context than before; we show how to apply the framework to protect unbounded continuous attributes and aggregate information; and we show how to use the framework to rigorously account for prior data releases.", acknowledgement = ack-nhfb, articleno = "3", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Ma:2014:SSC, author = "Shuai Ma and Yang Cao and Wenfei Fan and Jinpeng Huai and Tianyu Wo", title = "Strong simulation: Capturing topology in graph pattern matching", journal = j-TODS, volume = "39", number = "1", pages = "4:1--4:??", month = jan, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2528937", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Feb 5 11:31:16 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Graph pattern matching is finding all matches in a data graph for a given pattern graph and is often defined in terms of subgraph isomorphism, an NP-complete problem. To lower its complexity, various extensions of graph simulation have been considered instead. These extensions allow graph pattern matching to be conducted in cubic time. However, they fall short of capturing the topology of data graphs, that is, graphs may have a structure drastically different from pattern graphs they match, and the matches found are often too large to understand and analyze. To rectify these problems, this article proposes a notion of strong simulation, a revision of graph simulation for graph pattern matching. (1) We identify a set of criteria for preserving the topology of graphs matched. We show that strong simulation preserves the topology of data graphs and finds a bounded number of matches. (2) We show that strong simulation retains the same complexity as earlier extensions of graph simulation by providing a cubic-time algorithm for computing strong simulation. (3) We present the locality property of strong simulation which allows us to develop an effective distributed algorithm to conduct graph pattern matching on distributed graphs. (4) We experimentally verify the effectiveness and efficiency of these algorithms using both real-life and synthetic data.", acknowledgement = ack-nhfb, articleno = "4", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Gatterbauer:2014:OBP, author = "Wolfgang Gatterbauer and Dan Suciu", title = "Oblivious bounds on the probability of {Boolean} functions", journal = j-TODS, volume = "39", number = "1", pages = "5:1--5:??", month = jan, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2532641", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Feb 5 11:31:16 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "This article develops upper and lower bounds for the probability of Boolean functions by treating multiple occurrences of variables as independent and assigning them new individual probabilities. We call this approach dissociation and give an exact characterization of optimal oblivious bounds, that is, when the new probabilities are chosen independently of the probabilities of all other variables. Our motivation comes from the weighted model counting problem (or, equivalently, the problem of computing the probability of a Boolean function), which is \#P-hard in general. By performing several dissociations, one can transform a Boolean formula whose probability is difficult to compute into one whose probability is easy to compute, and which is guaranteed to provide an upper or lower bound on the probability of the original formula by choosing appropriate probabilities for the dissociated variables. Our new bounds shed light on the connection between previous relaxation-based and model-based approximations and unify them as concrete choices in a larger design space. We also show how our theory allows a standard relational database management system (DBMS) to both upper and lower bound hard probabilistic queries in guaranteed polynomial time.", acknowledgement = ack-nhfb, articleno = "5", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Fang:2014:MOP, author = "Qiong Fang and Wilfred Ng and Jianlin Feng and Yuliang Li", title = "Mining order-preserving submatrices from probabilistic matrices", journal = j-TODS, volume = "39", number = "1", pages = "6:1--6:??", month = jan, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2533712", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Feb 5 11:31:16 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Order-preserving submatrices (OPSMs) capture consensus trends over columns shared by rows in a data matrix. Mining OPSM patterns discovers important and interesting local correlations in many real applications, such as those involving biological data or sensor data. The prevalence of uncertain data in various applications, however, poses new challenges for OPSM mining, since data uncertainty must be incorporated into OPSM modeling and the algorithmic aspects. In this article, we define new probabilistic matrix representations to model uncertain data with continuous distributions. A novel probabilistic order-preserving submatrix (POPSM) model is formalized in order to capture similar local correlations in probabilistic matrices. The POPSM model adopts a new probabilistic support measure that evaluates the extent to which a row belongs to a POPSM pattern. Due to the intrinsic high computational complexity of the POPSM mining problem, we utilize the anti-monotonic property of the probabilistic support measure and propose an efficient Apriori-based mining framework called ProbApri to mine POPSM patterns. The framework consists of two mining methods, UniApri and NormApri, which are developed for mining POPSM patterns, respectively, from two representative types of probabilistic matrices, the UniDist matrix (assuming uniform data distributions) and the NormDist matrix (assuming normal data distributions). We show that the NormApri method is practical enough for mining POPSM patterns from probabilistic matrices that model more general data distributions. We demonstrate the superiority of our approach by two applications. First, we use two biological datasets to illustrate that the POPSM model better captures the characteristics of the expression levels of biologically correlated genes and greatly promotes the discovery of patterns with high biological significance. Our result is significantly better than the counterpart OPSMRM (OPSM with repeated measurement) model which adopts a set-valued matrix representation to capture data uncertainty. Second, we run the experiments on an RFID trace dataset and show that our POPSM model is effective and efficient in capturing the common visiting subroutes among users.", acknowledgement = ack-nhfb, articleno = "6", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Wang:2014:ESS, author = "Jiannan Wang and Guoliang Li and Jianhua Feng", title = "Extending string similarity join to tolerant fuzzy token matching", journal = j-TODS, volume = "39", number = "1", pages = "7:1--7:??", month = jan, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2535628", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Feb 5 11:31:16 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/string-matching.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "String similarity join that finds similar string pairs between two string sets is an essential operation in many applications and has attracted significant attention recently in the database community. A significant challenge in similarity join is to implement an effective fuzzy match operation to find all similar string pairs which may not match exactly. In this article, we propose a new similarity function, called fuzzy-token-matching-based similarity which extends token-based similarity functions (e.g., jaccard similarity and cosine similarity) by allowing fuzzy match between two tokens. We study the problem of similarity join using this new similarity function and present a signature-based method to address this problem. We propose new signature schemes and develop effective pruning techniques to improve the performance. We also extend our techniques to support weighted tokens. Experimental results show that our method achieves high efficiency and result quality and significantly outperforms state-of-the-art approaches.", acknowledgement = ack-nhfb, articleno = "7", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Sen:2014:DRM, author = "Siddhartha Sen and Robert E. Tarjan", title = "Deletion without rebalancing in multiway search trees", journal = j-TODS, volume = "39", number = "1", pages = "8:1--8:??", month = jan, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2540068", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Feb 5 11:31:16 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Some database systems that use a form of B-tree for the underlying data structure do not do rebalancing on deletion. This means that a bad sequence of deletions can create a very unbalanced tree. Yet such databases perform well in practice. Avoidance of rebalancing on deletion has been justified empirically and by average-case analysis, but to our knowledge, no worst-case analysis has been done. We do such an analysis. We show that the tree height remains logarithmic in the number of insertions, independent of the number of deletions. Furthermore, the amortized time for an insertion or deletion, excluding the search time, is $ O(1) $, and nodes are modified by insertions and deletions with a frequency that is exponentially small in their height. The latter results do not hold for standard B-trees. By adding periodic rebuilding of the tree, we obtain a data structure that is theoretically superior to standard B-trees in many ways. Our results suggest that rebalancing on deletion not only is unnecessary but may be harmful.", acknowledgement = ack-nhfb, articleno = "8", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Nekrich:2014:ERS, author = "Yakov Nekrich", title = "Efficient range searching for categorical and plain data", journal = j-TODS, volume = "39", number = "1", pages = "9:1--9:??", month = jan, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2543924", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Feb 5 11:31:16 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In the orthogonal range-searching problem, we store a set of input points $S$ in a data structure; the answer to a query $Q$ is a piece of information about points in $ Q \cap S $, for example, the list of all points in $ Q \cap S $ or the number of points in $Q$. In the colored (or categorical) range-searching problem, the set of input points is partitioned into categories; the answer to a query is a piece of information about categories of points in a query range. In this article, we describe several new results for one- and two-dimensional range-searching problems. We obtain an optimal adaptive data structure for counting the number of objects in a three-sided range and for counting categories of objects in a one-dimensional range. We also obtain new results on color range reporting in two dimensions, approximate color counting in one dimension, and some other related problems.", acknowledgement = ack-nhfb, articleno = "9", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Xu:2014:OCP, author = "Pan Xu and Srikanta Tirthapura", title = "Optimality of Clustering Properties of Space-Filling Curves", journal = j-TODS, volume = "39", number = "2", pages = "10:1--10:??", month = may, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2556686", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed May 21 18:53:24 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Space-filling curves have been used in the design of data structures for multidimensional data for many decades. A fundamental quality metric of a space-filling curve is its ``clustering number'' with respect to a class of queries, which is the average number of contiguous segments on the space-filling curve that a query region can be partitioned into. We present a characterization of the clustering number of a general class of space-filling curves, as well as the first nontrivial lower bounds on the clustering number for any space-filling curve. Our results answer questions that have been open for more than 15 years.", acknowledgement = ack-nhfb, articleno = "10", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Thomson:2014:FDT, author = "Alexander Thomson and Thaddeus Diamond and Shu-Chun Weng and Kun Ren and Philip Shao and Daniel J. Abadi", title = "Fast Distributed Transactions and Strongly Consistent Replication for {OLTP} Database Systems", journal = j-TODS, volume = "39", number = "2", pages = "11:1--11:??", month = may, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2556685", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed May 21 18:53:24 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "As more data management software is designed for deployment in public and private clouds, or on a cluster of commodity servers, new distributed storage systems increasingly achieve high data access throughput via partitioning and replication. In order to achieve high scalability, however, today's systems generally reduce transactional support, disallowing single transactions from spanning multiple partitions. This article describes Calvin, a practical transaction scheduling and data replication layer that uses a deterministic ordering guarantee to significantly reduce the normally prohibitive contention costs associated with distributed transactions. This allows near-linear scalability on a cluster of commodity machines, without eliminating traditional transactional guarantees, introducing a single point of failure, or requiring application developers to reason about data partitioning. By replicating transaction inputs instead of transactional actions, Calvin is able to support multiple consistency levels-including Paxos-based strong consistency across geographically distant replicas-at no cost to transactional throughput. Furthermore, Calvin introduces a set of tools that will allow application developers to gain the full performance benefit of Calvin's server-side transaction scheduling mechanisms without introducing the additional code complexity and inconvenience normally associated with using DBMS stored procedures in place of ad hoc client-side transactions.", acknowledgement = ack-nhfb, articleno = "11", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Nykiel:2014:SAM, author = "Tomasz Nykiel and Michalis Potamias and Chaitanya Mishra and George Kollios and Nick Koudas", title = "Sharing across Multiple {MapReduce} Jobs", journal = j-TODS, volume = "39", number = "2", pages = "12:1--12:??", month = may, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2560796", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed May 21 18:53:24 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Large-scale data analysis lies in the core of modern enterprises and scientific research. With the emergence of cloud computing, the use of an analytical query processing infrastructure can be directly associated with monetary cost. MapReduce has been a popular framework in the context of cloud computing, designed to serve long-running queries (jobs) which can be processed in batch mode. Taking into account that different jobs often perform similar work, there are many opportunities for sharing. In principle, sharing similar work reduces the overall amount of work, which can lead to reducing monetary charges for utilizing the processing infrastructure. In this article we present a sharing framework tailored to MapReduce, namely, {\tt MRShare}. Our framework, {\tt MRShare}, transforms a batch of queries into a new batch that will be executed more efficiently, by merging jobs into groups and evaluating each group as a single query. Based on our cost model for MapReduce, we define an optimization problem and we provide a solution that derives the optimal grouping of queries. Given the query grouping, we merge jobs appropriately and submit them to MapReduce for processing. A key property of {\tt MRShare} is that it is independent of the MapReduce implementation. Experiments with our prototype, built on top of Hadoop, demonstrate the overall effectiveness of our approach. {\tt MRShare} is primarily designed for handling I/O-intensive queries. However, with the development of high-level languages operating on top of MapReduce, user queries executed in this model become more complex and CPU intensive. Commonly, executed queries can be modeled as evaluating pipelines of CPU-expensive filters over the input stream. Examples of such filters include, but are not limited to, index probes, or certain types of joins. In this article we adapt some of the standard techniques for filter ordering used in relational and stream databases, propose their extensions, and implement them through {\tt MRAdaptiveFilter}, an extension of {\tt MRShare} for expensive filter ordering tailored to MapReduce, which allows one to handle both single- and batch-query execution modes. We present an experimental evaluation that demonstrates additional benefits of {\tt MRAdaptiveFilter}, when executing CPU-intensive queries in {\tt MRShare}.", acknowledgement = ack-nhfb, articleno = "12", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Lu:2014:EAC, author = "Ying Lu and Jiaheng Lu and Gao Cong and Wei Wu and Cyrus Shahabi", title = "Efficient Algorithms and Cost Models for Reverse Spatial-Keyword $k$-Nearest Neighbor Search", journal = j-TODS, volume = "39", number = "2", pages = "13:1--13:??", month = may, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2576232", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed May 21 18:53:24 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Geographic objects associated with descriptive texts are becoming prevalent, justifying the need for spatial-keyword queries that consider both locations and textual descriptions of the objects. Specifically, the relevance of an object to a query is measured by spatial-textual similarity that is based on both spatial proximity and textual similarity. In this article, we introduce the Reverse Spatial-Keyword $k$-Nearest Neighbor (RSK $k$ NN) query, which finds those objects that have the query as one of their $k$ nearest spatial-textual objects. The RSK $k$ NN queries have numerous applications in online maps and GIS decision support systems. To answer RSK $k$ NN queries efficiently, we propose a hybrid index tree, called IUR-tree (Intersection-Union R-tree) that effectively combines location proximity with textual similarity. Subsequently, we design a branch-and-bound search algorithm based on the IUR-tree. To accelerate the query processing, we improve IUR-tree by leveraging the distribution of textual description, leading to some variants of the IUR-tree called Clustered IUR-tree (CIUR-tree) and combined clustered IUR-tree (C$^2$ IUR-tree), for each of which we develop optimized algorithms. We also provide a theoretical cost model to analyze the efficiency of our algorithms. Our empirical studies show that the proposed algorithms are efficient and scalable.", acknowledgement = ack-nhfb, articleno = "13", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Bartolini:2014:DPW, author = "Ilaria Bartolini and Paolo Ciaccia and Marco Patella", title = "Domination in the Probabilistic World: Computing Skylines for Arbitrary Correlations and Ranking Semantics", journal = j-TODS, volume = "39", number = "2", pages = "14:1--14:??", month = may, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2602135", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed May 21 18:53:24 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In a probabilistic database, deciding if a tuple $u$ is better than another tuple $v$ has not a univocal solution, rather it depends on the specific Probabilistic Ranking Semantics (PRS) one wants to adopt so as to combine together tuples' scores and probabilities. In deterministic databases it is known that skyline queries are a remarkable alternative to (top-$k$) ranking queries, because they remove from the user the burden of specifying a scoring function that combines values of different attributes into a single score. The skyline of a deterministic relation $R$ is the set of undominated tuples in $R$ --- tuple $u$ dominates tuple $v$ iff on all the attributes of interest $u$ is better than or equal to $v$ and strictly better on at least one attribute. Domination is equivalent to having $ s(u) >= s(v)$ for all monotone scoring functions $ s()$. The skyline of a probabilistic relation $ R^p$ can be similarly defined as the set of $P$-undominated tuples in $ R^p$, where now $u$ $P$-dominates $v$ iff, whatever monotone scoring function one would use to combine the skyline attributes, $u$ is reputed better than $v$ by the PRS at hand. This definition, which is applicable to arbitrary ranking semantics and probabilistic correlation models, is parametric in the adopted PRS, thus it ensures that ranking and skyline queries will always return consistent results. In this article we provide an overall view of the problem of computing the skyline of a probabilistic relation. We show how, under mild conditions that indeed hold for all known PRSs, checking $P$-domination can be cast into an optimization problem, whose complexity we characterize for a variety of combinations of ranking semantics and correlation models. For each analyzed case we also provide specific $P$-domination rules, which are exploited by the algorithm we detail for the case where the probabilistic model is known to the query processor. We also consider the case in which the probability of tuple events can only be obtained through an oracle, and describe another skyline algorithm for this loosely integrated scenario. Our experimental evaluation of $P$-domination rules and skyline algorithms confirms the theoretical analysis.", acknowledgement = ack-nhfb, articleno = "14", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Deng:2014:CQR, author = "Ting Deng and Wenfei Fan", title = "On the Complexity of Query Result Diversification", journal = j-TODS, volume = "39", number = "2", pages = "15:1--15:??", month = may, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2602136", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed May 21 18:53:24 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Query result diversification is a bi-criteria optimization problem for ranking query results. Given a database $D$, a query $Q$, and a positive integer k, it is to find a set of $k$ tuples from $ Q(D)$ such that the tuples are as relevant as possible to the query, and at the same time, as diverse as possible to each other. Subsets of $ Q(D)$ are ranked by an objective function defined in terms of relevance and diversity. Query result diversification has found a variety of applications in databases, information retrieval, and operations research. This article investigates the complexity of result diversification for relational queries. (1) We identify three problems in connection with query result diversification, to determine whether there exists a set of $k$ tuples that is ranked above a bound with respect to relevance and diversity, to assess the rank of a given $k$-element set, and to count how many $k$-element sets are ranked above a given bound based on an objective function. (2) We study these problems for a variety of query languages and for the three objective functions proposed in Gollapudi and Sharma [2009]. We establish the upper and lower bounds of these problems, all matching, for both combined complexity and data complexity. (3) We also investigate several special settings of these problems, identifying tractable cases. Moreover, (4) we reinvestigate these problems in the presence of compatibility constraints commonly found in practice, and provide their complexity in all these settings.", acknowledgement = ack-nhfb, articleno = "15", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Giatrakos:2014:DGQ, author = "Nikos Giatrakos and Antonios Deligiannakis and Minos Garofalakis and Izchak Sharfman and Assaf Schuster", title = "Distributed Geometric Query Monitoring Using Prediction Models", journal = j-TODS, volume = "39", number = "2", pages = "16:1--16:??", month = may, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2602137", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed May 21 18:53:24 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Many modern streaming applications, such as online analysis of financial, network, sensor, and other forms of data, are inherently distributed in nature. An important query type that is the focal point in such application scenarios regards actuation queries, where proper action is dictated based on a trigger condition placed upon the current value that a monitored function receives. Recent work [Sharfman et al. 2006, 2007b, 2008] studies the problem of (nonlinear) sophisticated function tracking in a distributive manner. The main concept behind the geometric monitoring approach proposed there is for each distributed site to perform the function monitoring over an appropriate subset of the input domain. In the current work, we examine whether the distributed monitoring mechanism can become more efficient, in terms of the number of communicated messages, by extending the geometric monitoring framework to utilize prediction models. We initially describe a number of local estimators (predictors) that are useful for the applications that we consider and which have already been shown particularly useful in past work. We then demonstrate the feasibility of incorporating predictors in the geometric monitoring framework and show that prediction-based geometric monitoring in fact generalizes the original geometric monitoring framework. We propose a large variety of different prediction-based monitoring models for the distributed threshold monitoring of complex functions. Our extensive experimentation with a variety of real datasets, functions, and parameter settings indicates that our approaches can provide significant communication savings ranging between two times and up to three orders of magnitude, compared to the transmission cost of the original monitoring framework.", acknowledgement = ack-nhfb, articleno = "16", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Lei:2014:RDQ, author = "Chuan Lei and Elke A. Rundensteiner", title = "Robust Distributed Query Processing for Streaming Data", journal = j-TODS, volume = "39", number = "2", pages = "17:1--17:??", month = may, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2602138", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed May 21 18:53:24 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Distributed stream processing systems must function efficiently for data streams that fluctuate in their arrival rates and data distributions. Yet repeated and prohibitively expensive load reallocation across machines may make these systems ineffective, potentially resulting in data loss or even system failure. To overcome this problem, we propose a comprehensive solution, called the Robust Load Distribution (RLD) strategy, that is resilient under data fluctuations. RLD provides $ \epsilon $-optimal query performance under an expected range of load fluctuations without suffering from the performance penalty caused by load migration. RLD is based on three key strategies. First, we model robust distributed stream processing as a parametric query optimization problem in a parameter space that captures the stream fluctuations. The notions of both robust logical and robust physical plans that work together to proactively handle all ranges of expected fluctuations in parameters are abstracted as overlays of this parameter space. Second, our Early-terminated Robust Partitioning (ERP) finds a combination of robust logical plans that together cover the parameter space, while minimizing the number of prohibitively expensive optimizer calls with a probabilistic bound on the space coverage. Third, we design a family of algorithms for physical plan generation. Our GreedyPhy exploits a probabilistic model to efficiently find a robust physical plan that sustains most frequently used robust logical plans at runtime. Our CorPhy algorithm exploits operator correlations for the robust physical plan optimization. The resulting physical plan smooths the workload on each node under all expected fluctuations. Our OptPrune algorithm, using CorPhy as baseline, is guaranteed to find the optimal physical plan that maximizes the parameter space coverage with a practical increase in optimization time. Lastly, we further expand the capabilities of our proposed RLD framework to also appropriately react under so-called ``space drifts'', that is, a space drift is a change of the parameter space where the observed runtime statistics deviate from the expected optimization-time statistics. Our RLD solution is capable of adjusting itself to the unexpected yet significant data fluctuations beyond those planned for via covering the parameter space. Our experimental study using stock market and sensor network streams demonstrates that our RLD methodology consistently outperforms state-of-the-art solutions in terms of efficiency and effectiveness in highly fluctuating data stream environments.", acknowledgement = ack-nhfb, articleno = "17", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Jensen:2014:E, author = "Christian S. Jensen", title = "Editorial", journal = j-TODS, volume = "39", number = "3", pages = "18:1--18:??", month = sep, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2662448", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 7 18:54:33 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "18", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Zhang:2014:TPI, author = "Rui Zhang and Jianzhong Qi and Martin Stradling and Jin Huang", title = "Towards a Painless Index for Spatial Objects", journal = j-TODS, volume = "39", number = "3", pages = "19:1--19:??", month = sep, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2629333", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 7 18:54:33 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Conventional spatial indexes, represented by the R-tree, employ multidimensional tree structures that are complicated and require enormous efforts to implement in a full-fledged database management system (DBMS). An alternative approach for supporting spatial queries is mapping-based indexing, which maps both data and queries into a one-dimensional space such that data can be indexed and queries can be processed through a one-dimensional indexing structure such as the B$^+$. Mapping-based indexing requires implementing only a few mapping functions, incurring much less effort in implementation compared to conventional spatial index structures. Yet, a major concern about using mapping-based indexes is their lower efficiency than conventional tree structures. In this article, we propose a mapping-based spatial indexing scheme called Size Separation Indexing (SSI). SSI is equipped with a suite of techniques including size separation, data distribution transformation, and more efficient mapping algorithms. These techniques overcome the drawbacks of existing mapping-based indexes and significantly improve the efficiency of query processing. We show through extensive experiments that, for window queries on spatial objects with nonzero extents, SSI has two orders of magnitude better performance than existing mapping-based indexes and competitive performance to the R-tree as a standalone implementation. We have also implemented SSI on top of two off-the-shelf DBMSs, PostgreSQL and a commercial platform, both having R-tree implementation. In this case, SSI is up to two orders of magnitude faster than their provided spatial indexes. Therefore, we achieve a spatial index more efficient than the R-tree in a DBMS implementation that is at the same time easy to implement. This result may upset a common perception that has existed for a long time in this area that the R-tree is the best choice for indexing spatial objects.", acknowledgement = ack-nhfb, articleno = "19", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Tatti:2014:FRI, author = "Nikolaj Tatti and Fabian Moerchen and Toon Calders", title = "Finding Robust Itemsets under Subsampling", journal = j-TODS, volume = "39", number = "3", pages = "20:1--20:??", month = sep, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2656261", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 7 18:54:33 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Mining frequent patterns is plagued by the problem of pattern explosion, making pattern reduction techniques a key challenge in pattern mining. In this article we propose a novel theoretical framework for pattern reduction by measuring the robustness of a property of an itemset such as closedness or nonderivability. The robustness of a property is the probability that this property holds on random subsets of the original data. We study four properties, namely an itemset being closed, free, non-derivable, or totally shattered, and demonstrate how to compute the robustness analytically without actually sampling the data. Our concept of robustness has many advantages: Unlike statistical approaches for reducing patterns, we do not assume a null hypothesis or any noise model and, in contrast to noise-tolerant or approximate patterns, the robust patterns for a given property are always a subset of the patterns with this property. If the underlying property is monotonic then the measure is also monotonic, allowing us to efficiently mine robust itemsets. We further derive a parameter-free technique for ranking itemsets that can be used for top- k approaches. Our experiments demonstrate that we can successfully use the robustness measure to reduce the number of patterns and that ranking yields interesting itemsets.", acknowledgement = ack-nhfb, articleno = "20", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Choi:2014:MRS, author = "Dong-Wan Choi and Chin-Wan Chung and Yufei Tao", title = "Maximizing Range Sum in External Memory", journal = j-TODS, volume = "39", number = "3", pages = "21:1--21:??", month = sep, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2629477", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 7 18:54:33 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "This article studies the MaxRS problem in spatial databases. Given a set O of weighted points and a rectangle r of a given size, the goal of the MaxRS problem is to find a location of r such that the sum of the weights of all the points covered by r is maximized. This problem is useful in many location-based services such as finding the best place for a new franchise store with a limited delivery range and finding the hotspot with the largest number of nearby attractions for a tourist with a limited reachable range. However, the problem has been studied mainly in the theoretical perspective, particularly in computational geometry. The existing algorithms from the computational geometry community are in-memory algorithms that do not guarantee the scalability. In this article, we propose a scalable external-memory algorithm ( ExactMaxRS ) for the MaxRS problem that is optimal in terms of the I/O complexity. In addition, we propose an approximation algorithm ( ApproxMaxCRS ) for the MaxCRS problem that is a circle version of the MaxRS problem. We prove the correctness and optimality of the ExactMaxRS algorithm along with the approximation bound of the ApproxMaxCRS algorithm. Furthermore, motivated by the fact that all the existing solutions simply assume that there is no tied area for the best location, we extend the MaxRS problem to a more fundamental problem, namely AllMaxRS, so that all the locations with the same best score can be retrieved. We first prove that the AllMaxRS problem cannot be trivially solved by applying the techniques for the MaxRS problem. Then we propose an output-sensitive external-memory algorithm ( TwoPhaseMaxRS ) that gives the exact solution for the AllMaxRS problem through two phases. Also, we prove both the soundness and completeness of the result returned from TwoPhaseMaxRS. From extensive experimental results, we show that ExactMaxRS and ApproxMaxCRS are several orders of magnitude faster than methods adapted from existing algorithms, the approximation bound in practice is much better than the theoretical bound of ApproxMaxCRS, and TwoPhaseMaxRS is not only much faster but also more robust than the straightforward extension of ExactMaxRS.", acknowledgement = ack-nhfb, articleno = "21", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Karwa:2014:PAG, author = "Vishesh Karwa and Sofya Raskhodnikova and Adam Smith and Grigory Yaroslavtsev", title = "Private Analysis of Graph Structure", journal = j-TODS, volume = "39", number = "3", pages = "22:1--22:??", month = sep, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2611523", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 7 18:54:33 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We present efficient algorithms for releasing useful statistics about graph data while providing rigorous privacy guarantees. Our algorithms work on datasets that consist of relationships between individuals, such as social ties or email communication. The algorithms satisfy edge differential privacy, which essentially requires that the presence or absence of any particular relationship be hidden. Our algorithms output approximate answers to subgraph counting queries. Given a query graph H, for example, a triangle, $k$-star, or $k$ triangle, the goal is to return the number of edge-induced isomorphic copies of $H$ in the input graph. The special case of triangles was considered by Nissim et al. [2007] and a more general investigation of arbitrary query graphs was initiated by Rastogi et al. [2009]. We extend the approach of Nissim et al. to a new class of statistics, namely $k$-star queries. We also give algorithms for $k$-triangle queries using a different approach based on the higher-order local sensitivity. For the specific graph statistics we consider (i.e., $k$-stars and $k$-triangles), we significantly improve on the work of Rastogi et al.: our algorithms satisfy a stronger notion of privacy that does not rely on the adversary having a particular prior distribution on the data, and add less noise to the answers before releasing them. We evaluate the accuracy of our algorithms both theoretically and empirically, using a variety of real and synthetic datasets. We give explicit, simple conditions under which these algorithms add a small amount of noise. We also provide the average-case analysis in the Erd{\H{o}}s--R{\'e}nyi-Gilbert $ G(n, p)$ random graph model. Finally, we give hardness results indicating that the approach Nissim et al. used for triangles cannot easily be extended to $k$-triangles (hence justifying our development of a new algorithmic approach).", acknowledgement = ack-nhfb, articleno = "22", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Pang:2014:PPA, author = "Hweehwa Pang and Xuhua Ding", title = "Privacy-Preserving Ad-Hoc Equi-Join on Outsourced Data", journal = j-TODS, volume = "39", number = "3", pages = "23:1--23:??", month = sep, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2629501", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 7 18:54:33 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In IT outsourcing, a user may delegate the data storage and query processing functions to a third-party server that is not completely trusted. This gives rise to the need to safeguard the privacy of the database as well as the user queries over it. In this article, we address the problem of running ad hoc equi-join queries directly on encrypted data in such a setting. Our contribution is the first solution that achieves constant complexity per pair of records that are evaluated for the join. After formalizing the privacy requirements pertaining to the database and user queries, we introduce a cryptographic construct for securely joining records across relations. The construct protects the database with a strong encryption scheme. Moreover, information disclosure after executing an equi-join is kept to the minimum-that two input records combine to form an output record if and only if they share common join attribute values. There is no disclosure on records that are not part of the join result. Building on this construct, we then present join algorithms that optimize the join execution by eliminating the need to match every record pair from the input relations. We provide a detailed analysis of the cost of the algorithms and confirm the analysis through extensive experiments with both synthetic and benchmark workloads. Through this evaluation, we tease out useful insights on how to configure the join algorithms to deliver acceptable execution time in practice.", acknowledgement = ack-nhfb, articleno = "23", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Malvestuto:2014:JLO, author = "Francesco M. Malvestuto", title = "A Join-Like Operator to Combine Data Cubes and Answer Queries from Multiple Data Cubes", journal = j-TODS, volume = "39", number = "3", pages = "24:1--24:??", month = sep, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2638545", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 7 18:54:33 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In order to answer a ``joint'' query from multiple data cubes, Pourabass and Shoshani [2007] distinguish the data cube on the measure of interest (called the ``primary'' data cube) from the other data cubes (called ``proxy'' data cubes) that are used to involve the dimensions (in the query) not in the primary data cube. They demonstrate in study cases that, if the measures of the primary and proxy data cubes are correlated, then the answer to a joint query is an accurate estimate of its true value. Needless to say, for two or more proxy data cubes, the result depends upon the way the primary and proxy data cubes are combined together; however, for certain combination schemes Pourabass and Shoshani provide a sufficient condition, that they call proxy noncommonality, for the invariance of the result. In this article, we introduce: (1) a merge operator combining the contents of a primary data cube with the contents of a proxy data cube, (2) merge expressions for general combination schemes, and (3) an equivalence relation between merge expressions having the same pattern. Then, we prove that proxy noncommonality characterizes patterns for which every two merge expressions are equivalent. Moreover, we provide an efficient procedure for answering joint queries in the special case of perfect merge expressions. Finally, we show that our results apply to data cubes in which measures are obtained from unaggregated data using the aggregate functions SUM, COUNT, MAX, and MIN, and a lot more.", acknowledgement = ack-nhfb, articleno = "24", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Gottlob:2014:QRO, author = "Georg Gottlob and Giorgio Orsi and Andreas Pieris", title = "Query Rewriting and Optimization for Ontological Databases", journal = j-TODS, volume = "39", number = "3", pages = "25:1--25:??", month = sep, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2638546", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 7 18:54:33 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Ontological queries are evaluated against a knowledge base consisting of an extensional database and an ontology (i.e., a set of logical assertions and constraints that derive new intensional knowledge from the extensional database), rather than directly on the extensional database. The evaluation and optimization of such queries is an intriguing new problem for database research. In this article, we discuss two important aspects of this problem: query rewriting and query optimization. Query rewriting consists of the compilation of an ontological query into an equivalent first-order query against the underlying extensional database. We present a novel query rewriting algorithm for rather general types of ontological constraints that is well suited for practical implementations. In particular, we show how a conjunctive query against a knowledge base, expressed using linear and sticky existential rules, that is, members of the recently introduced Datalog\pm{} family of ontology languages, can be compiled into a union of conjunctive queries (UCQ) against the underlying database. Ontological query optimization, in this context, attempts to improve this rewriting process soas to produce possibly small and cost-effective UCQ rewritings for an input query.", acknowledgement = ack-nhfb, articleno = "25", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Jensen:2014:FIA, author = "Christian S. Jensen", title = "Foreword to Invited Articles Issue", journal = j-TODS, volume = "39", number = "4", pages = "26:1--26:??", month = dec, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2697050", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jan 7 15:35:46 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "26", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Hu:2014:EAT, author = "Xiaocheng Hu and Yufei Tao and Chin-Wan Chung", title = "{I/O}-Efficient Algorithms on Triangle Listing and Counting", journal = j-TODS, volume = "39", number = "4", pages = "27:1--27:??", month = dec, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2691190.2691193", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jan 7 15:35:46 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "This article studies I/O-efficient algorithms for the triangle listing problem and the triangle counting problem, whose solutions are basic operators in dealing with many other graph problems. In the former problem, given an undirected graph G, the objective is to find all the cliques involving 3 vertices in G. In the latter problem, the objective is to report just the number of such cliques without having to enumerate them. Both problems have been well studied in internal memory, but still remain as difficult challenges when G does not fit in memory, thus making it crucial to minimize the number of disk I/Os performed. Although previous research has attempted to tackle these challenges, the state-of-the-art solutions rely on a set of crippling assumptions to guarantee good performance. Motivated by this, we develop a new algorithm that is provably I/O and CPU efficient at the same time, without making any assumption on the input G at all. The algorithm uses ideas drastically different from all the previous approaches, and outperforms the existing competitors by a factor of over an order of magnitude in our extensive experimentation.", acknowledgement = ack-nhfb, articleno = "27", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Arenas:2014:DXK, author = "Marcelo Arenas and Jonny Daenen and Frank Neven and Martin Ugarte and Jan {Van Den Bussche} and Stijn Vansummeren", title = "Discovering {XSD} Keys from {XML} Data", journal = j-TODS, volume = "39", number = "4", pages = "28:1--28:??", month = dec, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2638547", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jan 7 15:35:46 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "A great deal of research into the learning of schemas from XML data has been conducted in recent years to enable the automatic discovery of XML schemas from XML documents when no schema or only a low-quality one is available. Unfortunately, and in strong contrast to, for instance, the relational model, the automatic discovery of even the simplest of XML constraints, namely XML keys, has been left largely unexplored in this context. A major obstacle here is the unavailability of a theory on reasoning about XML keys in the presence of XML schemas, which is needed to validate the quality of candidate keys. The present article embarks on a fundamental study of such a theory and classifies the complexity of several crucial properties concerning XML keys in the presence of an XSD, like, for instance, testing for consistency, boundedness, satisfiability, universality, and equivalence. Of independent interest, novel results are obtained related to cardinality estimation of XPath result sets. A mining algorithm is then developed within the framework of levelwise search. The algorithm leverages known discovery algorithms for functional dependencies in the relational model, but incorporates the properties mentioned before to assess and refine the quality of derived keys. An experimental study on an extensive body of real-world XML data evaluating the effectiveness of the proposed algorithm is provided.", acknowledgement = ack-nhfb, articleno = "28", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Jung:2014:SLM, author = "Hyungsoo Jung and Hyuck Han and Alan Fekete and Gernot Heiser and Heon Y. Yeom", title = "A Scalable Lock Manager for Multicores", journal = j-TODS, volume = "39", number = "4", pages = "29:1--29:??", month = dec, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2691190.2691192", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jan 7 15:35:46 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Modern implementations of DBMS software are intended to take advantage of high core counts that are becoming common in high-end servers. However, we have observed that several database platforms, including MySQL, Shore-MT, and a commercial system, exhibit throughput collapse as load increases into oversaturation (where there are more request threads than cores), even for a workload with little or no logical contention for locks, such as a read-only workload. Our analysis of MySQL identifies latch contention within the lock manager as the bottleneck responsible for this collapse. We design a lock manager with reduced latching, implement it in MySQL, and show that it avoids the collapse and generally improves performance. Our efficient implementation of a lock manager is enabled by a staged allocation and deallocation of locks. Locks are preallocated in bulk, so that the lock manager only has to perform simple list manipulation operations during the acquire and release phases of a transaction. Deallocation of the lock data structures is also performed in bulk, which enables the use of fast implementations of lock acquisition and release as well as concurrent deadlock checking.", acknowledgement = ack-nhfb, articleno = "29", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Papadopoulos:2014:LQA, author = "Stavros Papadopoulos and Graham Cormode and Antonios Deligiannakis and Minos Garofalakis", title = "Lightweight Query Authentication on Streams", journal = j-TODS, volume = "39", number = "4", pages = "30:1--30:??", month = dec, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2656336", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jan 7 15:35:46 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We consider a stream outsourcing setting, where a data owner delegates the management of a set of disjoint data streams to an untrusted server. The owner authenticates his streams via signatures. The server processes continuous queries on the union of the streams for clients trusted by the owner. Along with the results, the server sends proofs of result correctness derived from the owner's signatures, which are verifiable by the clients. We design novel constructions for a collection of fundamental problems over streams represented as linear algebraic queries. In particular, our basic schemes authenticate dynamic vector sums, matrix products, and dot products. These techniques can be adapted for authenticating a wide range of important operations in streaming environments, including group-by queries, joins, in-network aggregation, similarity matching, and event processing. We also present extensions to address the case of sliding window queries, and when multiple clients are interested in different subsets of the data. These methods take advantage of a novel nonce chaining technique that we introduce, which is used to reduce the verification cost without affecting any other costs. All our schemes are lightweight and offer strong cryptographic guarantees derived from formal definitions and proofs. We experimentally confirm the practicality of our schemes in the performance-sensitive streaming setting.", acknowledgement = ack-nhfb, articleno = "30", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Gheerbrant:2014:NEQ, author = "Am{\'e}lie Gheerbrant and Leonid Libkin and Cristina Sirangelo", title = "Na{\"\i}ve Evaluation of Queries over Incomplete Databases", journal = j-TODS, volume = "39", number = "4", pages = "31:1--31:??", month = dec, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2691190.2691194", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jan 7 15:35:46 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The term na{\"\i}ve evaluation refers to evaluating queries over incomplete databases as if nulls were usual data values, that is, to using the standard database query evaluation engine. Since the semantics of query answering over incomplete databases is that of certain answers, we would like to know when na{\"\i}ve evaluation computes them, that is, when certain answers can be found without inventing new specialized algorithms. For relational databases it is well known that unions of conjunctive queries possess this desirable property, and results on preservation of formulae under homomorphisms tell us that, within relational calculus, this class cannot be extended under the open-world assumption. Our goal here is twofold. First, we develop a general framework that allows us to determine, for a given semantics of incompleteness, classes of queries for which na{\"\i}ve evaluation computes certain answers. Second, we apply this approach to a variety of semantics, showing that for many classes of queries beyond unions of conjunctive queries, na{\"\i}ve evaluation makes perfect sense under assumptions different from open world. Our key observations are: (1) na{\"\i}ve evaluation is equivalent to monotonicity of queries with respect to a semantics-induced ordering, and (2) for most reasonable semantics of incompleteness, such monotonicity is captured by preservation under various types of homomorphisms. Using these results we find classes of queries for which na{\"\i}ve evaluation works, for example, positive first-order formulae for the closed-world semantics. Even more, we introduce a general relation-based framework for defining semantics of incompleteness, show how it can be used to capture many known semantics and to introduce new ones, and describe classes of first-order queries for which na{\"\i}ve evaluation works under such semantics.", acknowledgement = ack-nhfb, articleno = "31", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Kimelfeld:2014:CMM, author = "Benny Kimelfeld and Phokion G. Kolaitis", title = "The Complexity of Mining Maximal Frequent Subgraphs", journal = j-TODS, volume = "39", number = "4", pages = "32:1--32:??", month = dec, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2629550", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jan 7 15:35:46 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "A frequent subgraph of a given collection of graphs is a graph that is isomorphic to a subgraph of at least as many graphs in the collection as a given threshold. Frequent subgraphs generalize frequent itemsets and arise in various contexts, from bioinformatics to the Web. Since the space of frequent subgraphs is typically extremely large, research in graph mining has focused on special types of frequent subgraphs that can be orders of magnitude smaller in number, yet encapsulate the space of all frequent subgraphs. Maximal frequent subgraphs (i.e., the ones not properly contained in any frequent subgraph) constitute the most useful such type. In this article, we embark on a comprehensive investigation of the computational complexity of mining maximal frequent subgraphs. Our study is carried out by considering the effect of three different parameters: possible restrictions on the class of graphs; a fixed bound on the threshold; and a fixed bound on the number of desired answers. We focus on specific classes of connected graphs: general graphs, planar graphs, graphs of bounded degree, and graphs of bounded treewidth (trees being a special case). Moreover, each class has two variants: that in which the nodes are unlabeled, and that in which they are uniquely labeled. We delineate the complexity of the enumeration problem for each of these variants by determining when it is solvable in (total or incremental) polynomial time and when it is NP-hard. Specifically, for the labeled classes, we show that bounding the threshold yields tractability but, in most cases, bounding the number of answers does not, unless P=NP; an exception is the case of labeled trees, where bounding either of these two parameters yields tractability. The state of affairs turns out to be quite different for the unlabeled classes. The main (and most challenging to prove) result concerns unlabeled trees: we show NP-hardness, even if the input consists of two trees and both the threshold and the number of desired answers are equal to just two. In other words, we establish that the following problem is NP-complete: given two unlabeled trees, do they have more than one maximal subtree in common?", acknowledgement = ack-nhfb, articleno = "32", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Bienvenu:2014:OBD, author = "Meghyn Bienvenu and Balder {Ten Cate} and Carsten Lutz and Frank Wolter", title = "Ontology-Based Data Access: a Study through Disjunctive {Datalog}, {CSP}, and {MMSNP}", journal = j-TODS, volume = "39", number = "4", pages = "33:1--33:??", month = dec, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2661643", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jan 7 15:35:46 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Ontology-based data access is concerned with querying incomplete data sources in the presence of domain-specific knowledge provided by an ontology. A central notion in this setting is that of an ontology-mediated query, which is a database query coupled with an ontology. In this article, we study several classes of ontology-mediated queries, where the database queries are given as some form of conjunctive query and the ontologies are formulated in description logics or other relevant fragments of first-order logic, such as the guarded fragment and the unary negation fragment. The contributions of the article are threefold. First, we show that popular ontology-mediated query languages have the same expressive power as natural fragments of disjunctive datalog, and we study the relative succinctness of ontology-mediated queries and disjunctive datalog queries. Second, we establish intimate connections between ontology-mediated queries and constraint satisfaction problems (CSPs) and their logical generalization, MMSNP formulas. Third, we exploit these connections to obtain new results regarding: (i) first-order rewritability and datalog rewritability of ontology-mediated queries; (ii) P/NP dichotomies for ontology-mediated queries; and (iii) the query containment problem for ontology-mediated queries.", acknowledgement = ack-nhfb, articleno = "33", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Li:2014:TPP, author = "Chao Li and Daniel Yang Li and Gerome Miklau and Dan Suciu", title = "A Theory of Pricing Private Data", journal = j-TODS, volume = "39", number = "4", pages = "34:1--34:??", month = dec, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2691190.2691191", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jan 7 15:35:46 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Personal data has value to both its owner and to institutions who would like to analyze it. Privacy mechanisms protect the owner's data while releasing to analysts noisy versions of aggregate query results. But such strict protections of the individual's data have not yet found wide use in practice. Instead, Internet companies, for example, commonly provide free services in return for valuable sensitive information from users, which they exploit and sometimes sell to third parties. As awareness of the value of personal data increases, so has the drive to compensate the end-user for her private information. The idea of monetizing private data can improve over the narrower view of hiding private data, since it empowers individuals to control their data through financial means. In this article we propose a theoretical framework for assigning prices to noisy query answers as a function of their accuracy, and for dividing the price amongst data owners who deserve compensation for their loss of privacy. Our framework adopts and extends key principles from both differential privacy and query pricing in data markets. We identify essential properties of the pricing function and micropayments, and characterize valid solutions.", acknowledgement = ack-nhfb, articleno = "34", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Davidson:2014:TCN, author = "Susan Davidson and Sanjeev Khanna and Tova Milo and Sudeepa Roy", title = "Top-$k$ and Clustering with Noisy Comparisons", journal = j-TODS, volume = "39", number = "4", pages = "35:1--35:??", month = dec, year = "2014", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2684066", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Jan 7 15:35:46 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We study the problems of max/top-$k$ and clustering when the comparison operations may be performed by oracles whose answer may be erroneous. Comparisons may either be of type or of value: given two data elements, the answer to a type comparison is ``yes'' if the elements have the same type and therefore belong to the same group (cluster); the answer to a value comparison orders the two data elements. We give efficient algorithms that are guaranteed to achieve correct results with high probability, analyze the cost of these algorithms in terms of the total number of comparisons (i.e., using a fixed-cost model), and show that they are essentially the best possible. We also show that fewer comparisons are needed when values and types are correlated, or when the error model is one in which the error decreases as the distance between the two elements in the sorted order increases. Finally, we examine another important class of cost functions, concave functions, which balances the number of rounds of interaction with the oracle with the number of questions asked of the oracle. Results of this article form an important first step in providing a formal basis for max/top-$k$ and clustering queries in crowdsourcing applications, that is, when the oracle is implemented using the crowd. We explain what simplifying assumptions are made in the analysis, what results carry to a generalized crowdsourcing setting, and what extensions are required to support a full-fledged model.", acknowledgement = ack-nhfb, articleno = "35", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Jensen:2015:EUE, author = "Christian S. Jensen", title = "Editorial: Updates to the Editorial Board", journal = j-TODS, volume = "40", number = "1", pages = "1:1--1:??", month = mar, year = "2015", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2747020", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 26 05:54:21 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "1e", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Pripuzic:2015:TSE, author = "Kresimir Pripuzi{\'c} and Ivana Podnar Zarko and Karl Aberer", title = "Time- and Space-Efficient Sliding Window Top-$k$ Query Processing", journal = j-TODS, volume = "40", number = "1", pages = "1:1--1:??", month = mar, year = "2015", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2736701", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 26 05:54:21 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "A sliding window top-k ( top-k/w ) query monitors incoming data stream objects within a sliding window of size w to identify the k highest-ranked objects with respect to a given scoring function over time. Processing of such queries is challenging because, even when an object is not a top-k/w object at the time when it enters the processing system, it might become one in the future. Thus a set of potential top-k/w objects has to be stored in memory while its size should be minimized to efficiently cope with high data streaming rates. Existing approaches typically store top-k/w and candidate sliding window objects in a k-skyband over a two-dimensional score-time space. However, due to continuous changes of the k-skyband, its maintenance is quite costly. Probabilistic k-skyband is a novel data structure storing data stream objects from a sliding window with significant probability to become top-k/w objects in future. Continuous probabilistic k-skyband maintenance offers considerably improved runtime performance compared to k-skyband maintenance, especially for large values of k, at the expense of a small and controllable error rate. We propose two possible probabilistic k-skyband usages: ( i ) When it is used to process all sliding window objects, the resulting top-k/w algorithm is approximate and adequate for processing random-order data streams. ( ii ) When probabilistic k-skyband is used to process only a subset of most recent sliding window objects, it can improve the runtime performance of continuous k-skyband maintenance, resulting in a novel exact top-k/w algorithm. Our experimental evaluation systematically compares different top-k/w processing algorithms and shows that while competing algorithms offer either time efficiency at the expanse of space efficiency or vice-versa, our algorithms based on the probabilistic k-skyband are both time and space efficient.", acknowledgement = ack-nhfb, articleno = "1", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Olteanu:2015:SBF, author = "Dan Olteanu and Jakub Z{\'a}vodn{\'y}", title = "Size Bounds for Factorised Representations of Query Results", journal = j-TODS, volume = "40", number = "1", pages = "2:1--2:??", month = mar, year = "2015", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2656335", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 26 05:54:21 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We study two succinct representation systems for relational data based on relational algebra expressions with unions, Cartesian products, and singleton relations: f-representations, which employ algebraic factorisation using distributivity of product over union, and d-representations, which are f-representations where further succinctness is brought by explicit sharing of repeated subexpressions. In particular we study such representations for results of conjunctive queries. We derive tight asymptotic bounds for representation sizes and present algorithms to compute representations within these bounds. We compare the succinctness of f-representations and d-representations for results of equi-join queries, and relate them to fractional edge covers and fractional hypertree decompositions of the query hypergraph. Recent work showed that f-representations can significantly boost the performance of query evaluation in centralised and distributed settings and of machine learning tasks.", acknowledgement = ack-nhfb, articleno = "2", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Pawlik:2015:ECT, author = "Mateusz Pawlik and Nikolaus Augsten", title = "Efficient Computation of the Tree Edit Distance", journal = j-TODS, volume = "40", number = "1", pages = "3:1--3:??", month = mar, year = "2015", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2699485", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 26 05:54:21 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We consider the classical tree edit distance between ordered labelled trees, which is defined as the minimum-cost sequence of node edit operations that transform one tree into another. The state-of-the-art solutions for the tree edit distance are not satisfactory. The main competitors in the field either have optimal worst-case complexity but the worst case happens frequently, or they are very efficient for some tree shapes but degenerate for others. This leads to unpredictable and often infeasible runtimes. There is no obvious way to choose between the algorithms. In this article we present RTED, a robust tree edit distance algorithm. The asymptotic complexity of our algorithm is smaller than or equal to the complexity of the best competitors for any input instance, that is, our algorithm is both efficient and worst-case optimal. This is achieved by computing a dynamic decomposition strategy that depends on the input trees. RTED is shown optimal among all algorithms that use LRH ( left-right-heavy ) strategies, which include RTED and the fastest tree edit distance algorithms presented in literature. In our experiments on synthetic and real-world data we empirically evaluate our solution and compare it to the state-of-the-art.", acknowledgement = ack-nhfb, articleno = "3", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Drosou:2015:MRD, author = "Marina Drosou and Evaggelia Pitoura", title = "Multiple Radii {DisC} Diversity: Result Diversification Based on Dissimilarity and Coverage", journal = j-TODS, volume = "40", number = "1", pages = "4:1--4:??", month = mar, year = "2015", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2699499", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 26 05:54:21 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Recently, result diversification has attracted a lot of attention as a means to improve the quality of results retrieved by user queries. In this article, we introduce a novel definition of diversity called DisC diversity. Given a tuning parameter $r$, which we call radius, we consider two items to be similar if their distance is smaller than or equal to $r$. A DisC diverse subset of a result contains items such that each item in the result is represented by a similar item in the diverse subset and the items in the diverse subset are dissimilar to each other. We show that locating a minimum DisC diverse subset is an NP-hard problem and provide algorithms for its approximation. We extend our definition to the multiple radii case, where each item is associated with a different radius based on its importance, relevance, or other factors. We also propose adapting DisC diverse subsets to a different degree of diversification by adjusting $r$, that is, increasing the radius (or zooming-out) and decreasing the radius (or zooming-in). We present efficient implementations of our algorithms based on the $M$-tree, a spatial index structure, and experimentally evaluate their performance.", acknowledgement = ack-nhfb, articleno = "4", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Benedikt:2015:ASA, author = "Michael Benedikt and Pierre Bourhis and Clemens Ley", title = "Analysis of Schemas with Access Restrictions", journal = j-TODS, volume = "40", number = "1", pages = "5:1--5:??", month = mar, year = "2015", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2699500", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 26 05:54:21 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We study verification of systems whose transitions consist of accesses to a Web-based data source. An access is a lookup on a relation within a relational database, fixing values for a set of positions in the relation. For example, a transition can represent access to a Web form, where the user is restricted to filling in values for a particular set of fields. We look at verifying properties of a schema describing the possible accesses of such a system. We present a language where one can describe the properties of an access path and also specify additional restrictions on accesses that are enforced by the schema. Our main property language, AccessLTL, is based on a first-order extension of linear-time temporal logic, interpreting access paths as sequences of relational structures. We also present a lower-level automaton model, A-automata, into which AccessLTL specifications can compile. We show that AccessLTL and A-automata can express static analysis problems related to ``querying with limited access patterns'' that have been studied in the database literature in the past, such as whether an access is relevant to answering a query and whether two queries are equivalent in the accessible data they can return. We prove decidability and complexity results for several restrictions and variants of AccessLTL and explain which properties of paths can be expressed in each restriction.", acknowledgement = ack-nhfb, articleno = "5", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Athanassoulis:2015:OUD, author = "Manos Athanassoulis and Shimin Chen and Anastasia Ailamaki and Philip B. Gibbons and Radu Stoica", title = "Online Updates on Data Warehouses via Judicious Use of Solid-State Storage", journal = j-TODS, volume = "40", number = "1", pages = "6:1--6:??", month = mar, year = "2015", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2699484", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 26 05:54:21 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Data warehouses have been traditionally optimized for read-only query performance, allowing only offline updates at night, essentially trading off data freshness for performance. The need for $ 24 \times 7 $ operations in global markets and the rise of online and other quickly reacting businesses make concurrent online updates increasingly desirable. Unfortunately, state-of-the-art approaches fall short of supporting fast analysis queries over fresh data. The conventional approach of performing updates in place can dramatically slow down query performance, while prior proposals using differential updates either require large in-memory buffers or may incur significant update migration cost. This article presents a novel approach for supporting online updates in data warehouses that overcomes the limitations of prior approaches by making judicious use of available SSDs to cache incoming updates. We model the problem of query processing with differential updates as a type of outer join between the data residing on disks and the updates residing on SSDs. We present MaSM algorithms for performing such joins and periodic migrations, with small memory footprints, low query overhead, low SSD writes, efficient in-place migration of updates, and correct ACID support. We present detailed modeling of the proposed approach, and provide proofs regarding the fundamental properties of the MaSM algorithms. Our experimentation shows that MaSM incurs only up to 7\% overhead both on synthetic range scans (varying range size from 4KB to 100GB) and in a TPC-H query replay study, while also increasing the update throughput by orders of magnitude.", acknowledgement = ack-nhfb, articleno = "6", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Jensen:2015:EBT, author = "Christian S. Jensen", title = "Editorial: The Best of Two Worlds --- Present Your {TODS} Paper at {SIGMOD}", journal = j-TODS, volume = "40", number = "2", pages = "7:1--7:??", month = jun, year = "2015", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2770931", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 7 09:22:19 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "7", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Beedkar:2015:CGS, author = "Kaustubh Beedkar and Klaus Berberich and Rainer Gemulla and Iris Miliaraki", title = "Closing the Gap: Sequence Mining at Scale", journal = j-TODS, volume = "40", number = "2", pages = "8:1--8:??", month = jun, year = "2015", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2757217", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 7 09:22:19 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Frequent sequence mining is one of the fundamental building blocks in data mining. While the problem has been extensively studied, few of the available techniques are sufficiently scalable to handle datasets with billions of sequences; such large-scale datasets arise, for instance, in text mining and session analysis. In this article, we propose MG-FSM, a scalable algorithm for frequent sequence mining on MapReduce. MG-FSM can handle so-called ``gap constraints'', which can be used to limit the output to a controlled set of frequent sequences. Both positional and temporal gap constraints, as well as appropriate maximality and closedness constraints, are supported. At its heart, MG-FSM partitions the input database in a way that allows us to mine each partition independently using any existing frequent sequence mining algorithm. We introduce the notion of $ \omega $-equivalency, which is a generalization of the notion of a ``projected database'' used by many frequent pattern mining algorithms. We also present a number of optimization techniques that minimize partition size, and therefore computational and communication costs, while still maintaining correctness. Our experimental study in the contexts of text mining and session analysis suggests that MG-FSM is significantly more efficient and scalable than alternative approaches.", acknowledgement = ack-nhfb, articleno = "8", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Ameloot:2015:DDF, author = "Tom J. Ameloot", title = "Deciding Determinism with Fairness for Simple Transducer Networks", journal = j-TODS, volume = "40", number = "2", pages = "9:1--9:??", month = jun, year = "2015", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2757215", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 7 09:22:19 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "A distributed database system often operates in an asynchronous communication model where messages can be arbitrarily delayed. This communication model causes nondeterministic effects like unpredictable arrival orders of messages. Nonetheless, in general we want the distributed system to be deterministic; the system should produce the same output despite the nondeterministic effects on messages. Previously, two interpretations of determinism have been proposed. The first says that all infinite fair computation traces produce the same output. The second interpretation is a confluence notion, saying that all finite computation traces can still be extended to produce the same output. A decidability result for the confluence notion was previously obtained for so-called simple transducer networks, a model from the field of declarative networking. In the current article, we also present a decidability result for simple transducer networks, but this time for the first interpretation of determinism, with infinite fair computation traces. We also compare the expressivity of simple transducer networks under both interpretations.", acknowledgement = ack-nhfb, articleno = "9", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Nagendra:2015:EPS, author = "Mithila Nagendra and K. Sel{\c{c}}uk Candan", title = "Efficient Processing of Skyline-Join Queries over Multiple Data Sources", journal = j-TODS, volume = "40", number = "2", pages = "10:1--10:??", month = jun, year = "2015", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2699483", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 7 09:22:19 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Efficient processing of skyline queries has been an area of growing interest. Many of the earlier skyline techniques assumed that the skyline query is applied to a single data table. Naturally, these algorithms were not suitable for many applications in which the skyline query may involve attributes belonging to multiple data sources. In other words, if the data used in the skyline query are stored in multiple tables, then join operations would be required before the skyline can be searched. The task of computing skylines on multiple data sources has been coined as the skyline-join problem and various skyline-join algorithms have been proposed. However, the current proposals suffer several drawbacks: they often need to scan the input tables exhaustively in order to obtain the set of skyline-join results; moreover, the pruning techniques employed to eliminate the tuples are largely based on expensive pairwise tuple-to-tuple comparisons. In this article, we aim to address these shortcomings by proposing two novel skyline-join algorithms, namely skyline-sensitive join (S$^2$ J) and symmetric skyline-sensitive join (S$^3$ J), to process skyline queries over two data sources. Our approaches compute the results using a novel layer/region pruning technique ( LR-pruning ) that prunes the join space in blocks as opposed to individual data points, thereby avoiding excessive pairwise point-to-point dominance checks. Furthermore, the S$^3$ J algorithm utilizes an early stopping condition in order to successfully compute the skyline results by accessing only a subset of the input tables. In addition to S$^2$ J and S$^3$ J, we also propose the S$^2$ J-M and S$^3$ J-M algorithms. These algorithms extend S$^2$ J's and S$^3$ J's two-way skyline-join ability to efficiently process skyline-join queries over more than two data sources. S$^2$ J-M and S$^3$ J-M leverage the extended concept of LR-pruning, called M -way LR-pruning, to compute multi-way skyline-joins in which more than two data sources are integrated during skyline processing. We report extensive experimental results that confirm the advantages of the proposed algorithms over state-of-the-art skyline-join techniques.", acknowledgement = ack-nhfb, articleno = "10", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Yuan:2015:OBL, author = "Ganzhao Yuan and Zhenjie Zhang and Marianne Winslett and Xiaokui Xiao and Yin Yang and Zhifeng Hao", title = "Optimizing Batch Linear Queries under Exact and Approximate Differential Privacy", journal = j-TODS, volume = "40", number = "2", pages = "11:1--11:??", month = jun, year = "2015", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2699501", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 7 09:22:19 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Differential privacy is a promising privacy-preserving paradigm for statistical query processing over sensitive data. It works by injecting random noise into each query result such that it is provably hard for the adversary to infer the presence or absence of any individual record from the published noisy results. The main objective in differentially private query processing is to maximize the accuracy of the query results while satisfying the privacy guarantees. Previous work, notably Li et al. [2010], has suggested that, with an appropriate strategy, processing a batch of correlated queries as a whole achieves considerably higher accuracy than answering them individually. However, to our knowledge there is currently no practical solution to find such a strategy for an arbitrary query batch; existing methods either return strategies of poor quality (often worse than naive methods) or require prohibitively expensive computations for even moderately large domains. Motivated by this, we propose a low-rank mechanism (LRM), the first practical differentially private technique for answering batch linear queries with high accuracy. LRM works for both exact (i.e., $ \epsilon $-) and approximate (i.e., ($ \epsilon $, $ \delta $)-) differential privacy definitions. We derive the utility guarantees of LRM and provide guidance on how to set the privacy parameters, given the user's utility expectation. Extensive experiments using real data demonstrate that our proposed method consistently outperforms state-of-the-art query processing solutions under differential privacy, by large margins.", acknowledgement = ack-nhfb, articleno = "11", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Termehchy:2015:CEC, author = "Arash Termehchy and Ali Vakilian and Yodsawalai Chodpathumwan and Marianne Winslett", title = "Cost-Effective Conceptual Design for Information Extraction", journal = j-TODS, volume = "40", number = "2", pages = "12:1--12:??", month = jun, year = "2015", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2716321", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 7 09:22:19 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "It is well established that extracting and annotating occurrences of entities in a collection of unstructured text documents with their concepts improves the effectiveness of answering queries over the collection. However, it is very resource intensive to create and maintain large annotated collections. Since the available resources of an enterprise are limited and/or its users may have urgent information needs, it may have to select only a subset of relevant concepts for extraction and annotation. We call this subset a conceptual design for the annotated collection. In this article, we introduce and formally define the problem of cost-effective conceptual design where, given a collection, a set of relevant concepts, and a fixed budget, one likes to find a conceptual design that most improves the effectiveness of answering queries over the collection. We provide efficient algorithms for special cases of the problem and prove it is generally NP-hard in the number of relevant concepts. We propose three efficient approximations to solve the problem: a greedy algorithm, an approximate popularity maximization (APM for short), and approximate annotation-benefit maximization (AAM for short). We show that, if there are no constraints regrading the overlap of concepts, APM is a fully polynomial time approximation scheme. We also prove that if the relevant concepts are mutually exclusive, the greedy algorithm delivers a constant approximation ratio if the concepts are equally costly, APM has a constant approximation ratio, and AAM is a fully polynomial-time approximation scheme. Our empirical results using a Wikipedia collection and a search engine query log validate the proposed formalization of the problem and show that APM and AAM efficiently compute conceptual designs. They also indicate that, in general, APM delivers the optimal conceptual designs if the relevant concepts are not mutually exclusive. Also, if the relevant concepts are mutually exclusive, the conceptual designs delivered by AAM improve the effectiveness of answering queries over the collection more than the solutions provided by APM.", acknowledgement = ack-nhfb, articleno = "12", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Cao:2015:EPS, author = "Xin Cao and Gao Cong and Tao Guo and Christian S. Jensen and Beng Chin Ooi", title = "Efficient Processing of Spatial Group Keyword Queries", journal = j-TODS, volume = "40", number = "2", pages = "13:1--13:??", month = jun, year = "2015", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2772600", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 7 09:22:19 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "With the proliferation of geo-positioning and geo-tagging techniques, spatio-textual objects that possess both a geographical location and a textual description are gaining in prevalence, and spatial keyword queries that exploit both location and textual description are gaining in prominence. However, the queries studied so far generally focus on finding individual objects that each satisfy a query rather than finding groups of objects where the objects in a group together satisfy a query. We define the problem of retrieving a group of spatio-textual objects such that the group's keywords cover the query's keywords and such that the objects are nearest to the query location and have the smallest inter-object distances. Specifically, we study three instantiations of this problem, all of which are NP-hard. We devise exact solutions as well as approximate solutions with provable approximation bounds to the problems. In addition, we solve the problems of retrieving top- k groups of three instantiations, and study a weighted version of the problem that incorporates object weights. We present empirical studies that offer insight into the efficiency of the solutions, as well as the accuracy of the approximate solutions.", acknowledgement = ack-nhfb, articleno = "13", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Vincent:2015:TCD, author = "Millist Vincent and Jixue Liu and Hong-Cheu Liu and Sebastian Link", title = "Technical Correspondence: {``Differential Dependencies: Reasoning and Discovery''} Revisited", journal = j-TODS, volume = "40", number = "2", pages = "14:1--14:??", month = jun, year = "2015", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2757214", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 7 09:22:19 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "See \cite{Song:2011:DDR} and response \cite{Song:2017:RDD}.", abstract = "To address the frequently occurring situation where data is inexact or imprecise, a number of extensions to the classical notion of a functional dependency (FD) integrity constraint have been proposed in recent years. One of these extensions is the notion of a differential dependency (DD), introduced in the recent article ``Differential Dependencies: Reasoning and Discovery'' by Song and Chen in the March 2011 edition of this journal. A DD generalises the notion of an FD by requiring only that the values of the attribute from the RHS of the DD satisfy a distance constraint whenever the values of attributes from the LHS of the DD satisfy a distance constraint. In contrast, an FD requires that the values from the attributes in the RHS of an FD be equal whenever the values of the attributes from the LHS of the FD are equal. The article ``Differential Dependencies: Reasoning and Discovery'' investigated a number of aspects of DDs, the most important of which, since they form the basis for the other topics investigated, were the consistency problem (determining whether there exists a relation instance that satisfies a set of DDs) and the implication problem (determining whether a set of DDs logically implies another DD). Concerning these problems, a number of results were claimed in ``Differential Dependencies: Reasoning and Discovery''. In this article we conduct a detailed analysis of the correctness of these results. The outcomes of our analysis are that, for almost every claimed result, we show there are either fundamental errors in the proof or the result is false. For some of the claimed results we are able to provide corrected proofs, but for other results their correctness remains open.", acknowledgement = ack-nhfb, articleno = "14", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Lu:2015:BQA, author = "Jiaheng Lu and Chunbin Lin and Wei Wang and Chen Li and Xiaokui Xiao", title = "Boosting the Quality of Approximate String Matching by Synonyms", journal = j-TODS, volume = "40", number = "3", pages = "15:1--15:??", month = oct, year = "2015", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2818177", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Oct 24 11:43:27 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/string-matching.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "A string-similarity measure quantifies the similarity between two text strings for approximate string matching or comparison. For example, the strings ``Sam'' and ``Samuel'' can be considered to be similar. Most existing work that computes the similarity of two strings only considers syntactic similarities, for example, number of common words or $q$-grams. While this is indeed an indicator of similarity, there are many important cases where syntactically-different strings can represent the same real-world object. For example, ``Bill'' is a short form of ``William,'' and ``Database Management Systems'' can be abbreviated as ``DBMS.'' Given a collection of predefined synonyms, the purpose of this article is to explore such existing knowledge to effectively evaluate the similarity between two strings and efficiently perform similarity searches and joins, thereby boosting the quality of approximate string matching. In particular, we first present an expansion-based framework to measure string similarities efficiently while considering synonyms. We then study efficient algorithms for similarity searches and joins by proposing two novel indexes, called SI-trees and QP-trees, which combine signature-filtering and length-filtering strategies. In order to improve the efficiency of our algorithms, we develop an estimator to estimate the size of candidates to enable an online selection of signature filters. This estimator provides strong low-error, high-confidence guarantees while requiring only logarithmic space and time costs, thus making our method attractive both in theory and in practice. Finally, the experimental results from a comprehensive study of the algorithms with three real datasets verify the effectiveness and efficiency of our approaches.", acknowledgement = ack-nhfb, articleno = "15", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Rusu:2015:WDA, author = "Florin Rusu and Zixuan Zhuang and Mingxi Wu and Chris Jermaine", title = "Workload-Driven Antijoin Cardinality Estimation", journal = j-TODS, volume = "40", number = "3", pages = "16:1--16:??", month = oct, year = "2015", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2818178", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Oct 24 11:43:27 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Antijoin cardinality estimation is among a handful of problems that has eluded accurate efficient solutions amenable to implementation in relational query optimizers. Given the widespread use of antijoin and subset-based queries in analytical workloads and the extensive research targeted at join cardinality estimation---a seemingly related problem---the lack of adequate solutions for antijoin cardinality estimation is intriguing. In this article, we introduce a novel sampling-based estimator for antijoin cardinality that (unlike existent estimators) provides sufficient accuracy and efficiency to be implemented in a query optimizer. The proposed estimator incorporates three novel ideas. First, we use prior workload information when learning a mixture superpopulation model of the data offline. Second, we design a Bayesian statistics framework that updates the superpopulation model according to the live queries, thus allowing the estimator to adapt dynamically to the online workload. Third, we develop an efficient algorithm for sampling from a hypergeometric distribution in order to generate Monte Carlo trials, without explicitly instantiating either the population or the sample. When put together, these ideas form the basis of an efficient antijoin cardinality estimator satisfying the strict requirements of a query optimizer, as shown by the extensive experimental results over synthetically-generated as well as massive TPC-H data.", acknowledgement = ack-nhfb, articleno = "16", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Chen:2015:OLQ, author = "Zitong Chen and Yubao Liu and Raymond Chi-Wing Wong and Jiamin Xiong and Ganglin Mai and Cheng Long", title = "Optimal Location Queries in Road Networks", journal = j-TODS, volume = "40", number = "3", pages = "17:1--17:??", month = oct, year = "2015", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2818179", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Oct 24 11:43:27 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In this article, we study an optimal location query based on a road network. Specifically, given a road network containing clients and servers, an optimal location query finds a location on the road network such that when a new server is set up at this location, a certain cost function computed based on the clients and servers (including the new server) is optimized. Two types of cost functions, namely, MinMax and MaxSum, have been used for this query. The optimal location query problem with MinMax as the cost function is called the MinMax query, which finds a location for setting up a new server such that the maximum cost of a client being served by his/her closest server is minimized. The optimal location query problem with MaxSum as the cost function is called the MaxSum query, which finds a location for setting up a new server such that the sum of the weights of clients attracted by the new server is maximized. The MinMax query and the MaxSum query correspond to two types of optimal location query with the objectives defined from the clients' perspective and from the new server's perspective, respectively. Unfortunately, the existing solutions for the optimal query problem are not efficient. In this article, we propose an efficient algorithm, namely, MinMax-Alg ( MaxSum-Alg ), for the MinMax (MaxSum) query, which is based on a novel idea of nearest location component. We also discuss two extensions of the optimal location query, namely, the optimal multiple-location query and the optimal location query on a 3D road network. Extensive experiments were conducted, showing that our algorithms are faster than the state of the art by at least an order of magnitude on large real benchmark datasets. For example, in our largest real datasets, the state of the art ran for more than 10 (12) hours while our algorithm ran within 3 (2) minutes only for the MinMax (MaxSum) query, that is, our algorithm ran at least 200 (600) times faster than the state of the art.", acknowledgement = ack-nhfb, articleno = "17", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Rietveld:2015:RLD, author = "Kristian F. D. Rietveld and Harry A. G. Wijshoff", title = "Reducing Layered Database Applications to their Essence through Vertical Integration", journal = j-TODS, volume = "40", number = "3", pages = "18:1--18:??", month = oct, year = "2015", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2818180", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Oct 24 11:43:27 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In the last decade, improvements on single-core performance of CPUs has stagnated. Consequently, methods for the development and optimization of software for these platforms have to be reconsidered. Software must be optimized such that the available single-core performance is exploited more effectively. This can be achieved by reducing the number of instructions that need to be executed. In this article, we show that layered database applications execute many redundant, nonessential, instructions that can be eliminated without affecting the course of execution and the output of the application. This elimination is performed using a vertical integration process which breaks down the different layers of layered database applications. By doing so, applications are being reduced to their essence, and as a consequence, transformations can be carried out that affect both the application code and the data access code which were not possible before. We show that this vertical integration process can be fully automated and, as such, be integrated in an operational workflow. Experimental evaluation of this approach shows that up to 95\% of the instructions can be eliminated. The reduction of instructions leads to a more efficient use of the available hardware resources. This results in greatly improved performance of the application and a significant reduction in energy consumption.", acknowledgement = ack-nhfb, articleno = "18", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Cheng:2015:SDM, author = "Yu Cheng and Florin Rusu", title = "{SCANRAW}: a Database Meta-Operator for Parallel In-Situ Processing and Loading", journal = j-TODS, volume = "40", number = "3", pages = "19:1--19:??", month = oct, year = "2015", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2818181", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Oct 24 11:43:27 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Traditional databases incur a significant data-to-query delay due to the requirement to load data inside the system before querying. Since this is not acceptable in many domains generating massive amounts of raw data (e.g., genomics), databases are entirely discarded. External tables, on the other hand, provide instant SQL querying over raw files. Their performance across a query workload is limited though by the speed of repeated full scans, tokenizing, and parsing of the entire file. In this article, we propose SCANRAW, a novel database meta-operator for in-situ processing over raw files that integrates data loading and external tables seamlessly, while preserving their advantages: optimal performance across a query workload and zero time-to-query. We decompose loading and external table processing into atomic stages in order to identify common functionality. We analyze alternative implementations and discuss possible optimizations for each stage. Our major contribution is a parallel superscalar pipeline implementation that allows SCANRAW to take advantage of the current many- and multicore processors by overlapping the execution of independent stages. Moreover, SCANRAW overlaps query processing with loading by speculatively using the additional I/O bandwidth arising during the conversion process for storing data into the database, such that subsequent queries execute faster. As a result, SCANRAW makes intelligent use of the available system resources---CPU cycles and I/O bandwidth---by switching dynamically between tasks to ensure that optimal performance is achieved. We implement SCANRAW in a state-of-the-art database system and evaluate its performance across a variety of synthetic and real-world datasets. Our results show that SCANRAW with speculative loading achieves the best-possible performance for a query sequence at any point in the processing. Moreover, SCANRAW maximizes resource utilization for the entire workload execution while speculatively loading data and without interfering with normal query processing.", acknowledgement = ack-nhfb, articleno = "19", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Parchas:2015:UGP, author = "Panos Parchas and Francesco Gullo and Dimitris Papadias and Francesco Bonchi", title = "Uncertain Graph Processing through Representative Instances", journal = j-TODS, volume = "40", number = "3", pages = "20:1--20:??", month = oct, year = "2015", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2818182", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Oct 24 11:43:27 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Data in several applications can be represented as an uncertain graph whose edges are labeled with a probability of existence. Exact query processing on uncertain graphs is prohibitive for most applications, as it involves evaluation over an exponential number of instantiations. Thus, typical approaches employ Monte-Carlo sampling, which (i) draws a number of possible graphs (samples), (ii) evaluates the query on each of them, and (iii) aggregates the individual answers to generate the final result. However, this approach can also be extremely time consuming for large uncertain graphs commonly found in practice. To facilitate efficiency, we study the problem of extracting a single representative instance from an uncertain graph. Conventional processing techniques can then be applied on this representative to closely approximate the result on the original graph. In order to maintain data utility, the representative instance should preserve structural characteristics of the uncertain graph. We start with representatives that capture the expected vertex degrees, as this is a fundamental property of the graph topology. We then generalize the notion of vertex degree to the concept of n -clique cardinality, that is, the number of cliques of size n that contain a vertex. For the first problem, we propose two methods: Average Degree Rewiring (ADR), which is based on random edge rewiring, and Approximate B-Matching (ABM), which applies graph matching techniques. For the second problem, we develop a greedy approach and a game-theoretic framework. We experimentally demonstrate, with real uncertain graphs, that indeed the representative instances can be used to answer, efficiently and accurately, queries based on several metrics such as shortest path distance, clustering coefficient, and betweenness centrality.", acknowledgement = ack-nhfb, articleno = "20", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Ameloot:2016:WFM, author = "Tom J. Ameloot and Bas Ketsman and Frank Neven and Daniel Zinn", title = "Weaker Forms of Monotonicity for Declarative Networking: a More Fine-Grained Answer to the {CALM}-Conjecture", journal = j-TODS, volume = "40", number = "4", pages = "21:1--21:??", month = jan, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2809784", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jan 21 12:35:53 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The CALM-conjecture, first stated by Hellerstein [2010] and proved in its revised form by Ameloot et al. [2013] within the framework of relational transducer networks, asserts that a query has a coordination-free execution strategy if and only if the query is monotone. Zinn et al. [2012] extended the framework of relational transducer networks to allow for specific data distribution strategies and showed that the nonmonotone win-move query is coordination-free for domain-guided data distributions. In this article, we extend the story by equating increasingly larger classes of coordination-free computations with increasingly weaker forms of monotonicity and present explicit Datalog variants that capture each of these classes. One such fragment is based on stratified Datalog where rules are required to be connected with the exception of the last stratum. In addition, we characterize coordination-freeness as those computations that do not require knowledge about all other nodes in the network, and therefore, can not globally coordinate. The results in this article can be interpreted as a more fine-grained answer to the CALM-conjecture.", acknowledgement = ack-nhfb, articleno = "21", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Bonifati:2016:LJQ, author = "Angela Bonifati and Radu Ciucanu and S{\l}awek Staworko", title = "Learning Join Queries from User Examples", journal = j-TODS, volume = "40", number = "4", pages = "24:1--24:??", month = jan, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2818637", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jan 21 12:35:53 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We investigate the problem of learning join queries from user examples. The user is presented with a set of candidate tuples and is asked to label them as positive or negative examples, depending on whether or not she would like the tuples as part of the join result. The goal is to quickly infer an arbitrary n -ary join predicate across an arbitrary number m of relations while keeping the number of user interactions as minimal as possible. We assume no prior knowledge of the integrity constraints across the involved relations. Inferring the join predicate across multiple relations when the referential constraints are unknown may occur in several applications, such as data integration, reverse engineering of database queries, and schema inference. In such scenarios, the number of tuples involved in the join is typically large. We introduce a set of strategies that let us inspect the search space and aggressively prune what we call uninformative tuples, and we directly present to the user the informative ones---that is, those that allow the user to quickly find the goal query she has in mind. In this article, we focus on the inference of joins with equality predicates and also allow disjunctive join predicates and projection in the queries. We precisely characterize the frontier between tractability and intractability for the following problems of interest in these settings: consistency checking, learnability, and deciding the informativeness of a tuple. Next, we propose several strategies for presenting tuples to the user in a given order that allows minimization of the number of interactions. We show the efficiency of our approach through an experimental study on both benchmark and synthetic datasets.", acknowledgement = ack-nhfb, articleno = "24", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Yang:2016:NFI, author = "Xiaochun Yang and Tao Qiu and Bin Wang and Baihua Zheng and Yaoshu Wang and Chen Li", title = "Negative Factor: Improving Regular-Expression Matching in Strings", journal = j-TODS, volume = "40", number = "4", pages = "25:1--25:46", month = jan, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2847525", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jan 21 12:35:53 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/string-matching.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The problem of finding matches of a regular expression (RE) on a string exists in many applications, such as text editing, biosequence search, and shell commands. Existing techniques first identify candidates using substrings in the RE, then verify each of them using an automaton. These techniques become inefficient when there are many candidate occurrences that need to be verified. In this article, we propose a novel technique that prunes false negatives by utilizing negative factors, which are substrings that cannot appear in an answer. A main advantage of the technique is that it can be integrated with many existing algorithms to improve their efficiency significantly. We present a detailed description of this technique. We develop an efficient algorithm that utilizes negative factors to prune candidates, then improve it by using bit operations to process negative factors in parallel. We show that negative factors, when used with necessary factors (substrings that must appear in each answer), can achieve much better pruning power. We analyze the large number of negative factors, and develop an algorithm for finding a small number of high-quality negative factors. We conducted a thorough experimental study of this technique on real datasets, including DNA sequences, proteins, and text documents, and show significant performance improvement of the state-of-the-art tools by an order of magnitude.", acknowledgement = ack-nhfb, articleno = "25", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Zhou:2016:FRW, author = "Zhuojie Zhou and Nan Zhang and Zhiguo Gong and Gautam Das", title = "Faster Random Walks by Rewiring Online Social Networks On-the-Fly", journal = j-TODS, volume = "40", number = "4", pages = "26:1--26:??", month = feb, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2847526", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Jun 20 11:19:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Many online social networks feature restrictive web interfaces that only allow the query of a user's local neighborhood. To enable analytics over such an online social network through its web interface, many recent efforts use Markov Chain Monte Carlo (MCMC) methods such as random walks to sample users in the social network and thereby support analytics based on the samples. The problem with such an approach, however, is the large amount of queries often required for a random walk to converge to a desired (stationary) sampling distribution. In this article, we consider a novel problem of enabling a faster random walk over online social networks by ``rewiring'' the social network on-the-fly. Specifically, we develop a Modified TOpology Sampling (MTO-Sampling) scheme that, by using only information exposed by the restrictive web interface, constructs a ``virtual'' random-walk-friendly overlay topology of the social network while performing a random walk and ensures that the random walk follows the modified overlay topology rather than the original one. We describe in this article instantiations of MTO-Sampling for various types of random walks, such as Simple Random Walk (MTO-SRW), Metropolis--Hastings Random Walk (MTO-MHRW), and General Random Walk (MTO-GRW). We not only rigidly prove that MTO-Sampling improves the efficiency of sampling, but we also demonstrate the significance of such improvement through experiments on real-world online social networks such as Google Plus, Epinion, Facebook, etc.", acknowledgement = ack-nhfb, articleno = "26", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Jensen:2016:EUE, author = "Christian S. Jensen", title = "Editorial: Updates to the Editorial Board", journal = j-TODS, volume = "41", number = "1", pages = "1:1--1:??", month = apr, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2893581", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Jun 20 11:19:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "1e", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Libkin:2016:STV, author = "Leonid Libkin", title = "{SQL}'s Three-Valued Logic and Certain Answers", journal = j-TODS, volume = "41", number = "1", pages = "1:1--1:??", month = apr, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2877206", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Jun 20 11:19:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The goal of the article is to bridge the difference between theoretical and practical approaches to answering queries over databases with nulls. Theoretical research has long ago identified the notion of correctness of query answering over incomplete data: one needs to find certain answers, which are true regardless of how incomplete information is interpreted. This serves as the notion of correctness of query answering, but carries a huge complexity tag. In practice, on the other hand, query answering must be very efficient, and to achieve this, SQL uses three-valued logic for evaluating queries on databases with nulls. Due to the complexity mismatch, the two approaches cannot coincide, but perhaps they are related in some way. For instance, does SQL always produce answers we can be certain about? This is not so: SQL's and certain answers semantics could be totally unrelated. We show, however, that a slight modification of the three-valued semantics for relational calculus queries can provide the required certainty guarantees. The key point of the new scheme is to fully utilize the three-valued semantics, and classify answers not into certain or noncertain, as was done before, but rather into certainly true, certainly false, or unknown. This yields relatively small changes to the evaluation procedure, which we consider at the level of both declarative (relational calculus) and procedural (relational algebra) queries. These new evaluation procedures give us certainty guarantees even for queries returning tuples with null values.", acknowledgement = ack-nhfb, articleno = "1", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Zhang:2016:MOF, author = "Ce Zhang and Arun Kumar and Christopher R{\'e}", title = "Materialization Optimizations for Feature Selection Workloads", journal = j-TODS, volume = "41", number = "1", pages = "2:1--2:??", month = apr, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2877204", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Jun 20 11:19:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "There is an arms race in the data management industry to support statistical analytics. Feature selection, the process of selecting a feature set that will be used to build a statistical model, is widely regarded as the most critical step of statistical analytics. Thus, we argue that managing the feature selection process is a pressing data management challenge. We study this challenge by describing a feature selection language and a supporting prototype system that builds on top of current industrial R-integration layers. From our interactions with analysts, we learned that feature selection is an interactive human-in-the-loop process, which means that feature selection workloads are rife with reuse opportunities. Thus, we study how to materialize portions of this computation using not only classical database materialization optimizations but also methods that have not previously been used in database optimization, including structural decomposition methods (like QR factorization) and warmstart. These new methods have no analogue in traditional SQL systems, but they may be interesting for array and scientific database applications. On a diverse set of datasets and programs, we find that traditional database-style approaches that ignore these new opportunities are more than two orders of magnitude slower than an optimal plan in this new trade-off space across multiple R backends. Furthermore, we show that it is possible to build a simple cost-based optimizer to automatically select a near-optimal execution plan for feature selection.", acknowledgement = ack-nhfb, articleno = "2", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Olteanu:2016:EFP, author = "Dan Olteanu and Sebastiaan J. {Van Schaik}", title = "{ENFrame}: a Framework for Processing Probabilistic Data", journal = j-TODS, volume = "41", number = "1", pages = "3:1--3:??", month = apr, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2877205", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Jun 20 11:19:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "This article introduces ENFrame, a framework for processing probabilistic data. Using ENFrame, users can write programs in a fragment of Python with constructs such as loops, list comprehension, aggregate operations on lists, and calls to external database engines. Programs are then interpreted probabilistically by ENFrame. We exemplify ENFrame on three clustering algorithms ( k -means, k -medoids, and Markov clustering) and one classification algorithm ( k -nearest-neighbour). A key component of ENFrame is an event language to succinctly encode correlations, trace the computation of user programs, and allow for computation of discrete probability distributions for program variables. We propose a family of sequential and concurrent, exact, and approximate algorithms for computing the probability of interconnected events. Experiments with k -medoids clustering and k -nearest-neighbour show orders-of-magnitude improvements of exact processing using ENFrame over na{\"\i}ve processing in each possible world, of approximate over exact, and of concurrent over sequential processing.", acknowledgement = ack-nhfb, articleno = "3", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Fink:2016:DQN, author = "Robert Fink and Dan Olteanu", title = "Dichotomies for Queries with Negation in Probabilistic Databases", journal = j-TODS, volume = "41", number = "1", pages = "4:1--4:??", month = apr, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2877203", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Jun 20 11:19:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "This article charts the tractability frontier of two classes of relational algebra queries in tuple-independent probabilistic databases. The first class consists of queries with join, projection, selection, and negation but without repeating relation symbols and union. The second class consists of quantified queries that express the following binary relationships among sets of entities: set division, set inclusion, set equivalence, and set incomparability. Quantified queries are expressible in relational algebra using join, projection, nested negation, and repeating relation symbols. Each query in the two classes has either polynomial-time or \#P-hard data complexity and the tractable queries can be recognised efficiently. Our result for the first query class extends a known dichotomy for conjunctive queries without self-joins to such queries with negation. For quantified queries, their tractability is sensitive to their outermost projection operator: They are tractable if no attribute representing set identifiers is projected away and \#P-hard otherwise.", acknowledgement = ack-nhfb, articleno = "4", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Zhou:2016:BEQ, author = "Xiaoling Zhou and Jianbin Qin and Chuan Xiao and Wei Wang and Xuemin Lin and Yoshiharu Ishikawa", title = "{BEVA}: an Efficient Query Processing Algorithm for Error-Tolerant Autocompletion", journal = j-TODS, volume = "41", number = "1", pages = "5:1--5:??", month = apr, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2877201", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Jun 20 11:19:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Query autocompletion has become a standard feature in many search applications, especially for search engines. A recent trend is to support the error-tolerant autocompletion, which increases the usability significantly by matching prefixes of database strings and allowing a small number of errors. In this article, we systematically study the query processing problem for error-tolerant autocompletion with a given edit distance threshold. We propose a general framework that encompasses existing methods and characterizes different classes of algorithms and the minimum amount of information they need to maintain under different constraints. We then propose a novel evaluation strategy that achieves the minimum active node size by eliminating ancestor-descendant relationships among active nodes entirely. In addition, we characterize the essence of edit distance computation by a novel data structure named edit vector automaton (EVA). It enables us to compute new active nodes and their associated states efficiently by table lookups. In order to support large distance thresholds, we devise a partitioning scheme to reduce the size and construction cost of the automaton, which results in the universal partitioned EVA (UPEVA) to handle arbitrarily large thresholds. Our extensive evaluation demonstrates that our proposed method outperforms existing approaches in both space and time efficiencies.", acknowledgement = ack-nhfb, articleno = "5", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Fagin:2016:DCI, author = "Ronald Fagin and Benny Kimelfeld and Frederick Reiss and Stijn Vansummeren", title = "Declarative Cleaning of Inconsistencies in Information Extraction", journal = j-TODS, volume = "41", number = "1", pages = "6:1--6:??", month = apr, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2877202", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Jun 20 11:19:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The population of a predefined relational schema from textual content, commonly known as Information Extraction (IE), is a pervasive task in contemporary computational challenges associated with Big Data. Since the textual content varies widely in nature and structure (from machine logs to informal natural language), it is notoriously difficult to write IE programs that unambiguously extract the sought information. For example, during extraction, an IE program could annotate a substring as both an address and a person name. When this happens, the extracted information is said to be inconsistent, and some way of removing inconsistencies is crucial to compute the final output. Industrial-strength IE systems like GATE and IBM SystemT therefore provide a built-in collection of cleaning operations to remove inconsistencies from extracted relations. These operations, however, are collected in an ad hoc fashion through use cases. Ideally, we would like to allow IE developers to declare their own policies. But existing cleaning operations are defined in an algorithmic way, and hence it is not clear how to extend the built-in operations without requiring low-level coding of internal or external functions. We embark on the establishment of a framework for declarative cleaning of inconsistencies in IE through principles of database theory. Specifically, building upon the formalism of document spanners for IE, we adopt the concept of prioritized repairs, which has been recently proposed as an extension of the traditional database repairs to incorporate priorities among conflicting facts. We show that our framework captures the popular cleaning policies, as well as the POSIX semantics for extraction through regular expressions. We explore the problem of determining whether a cleaning declaration is unambiguous (i.e., always results in a single repair) and whether it increases the expressive power of the extraction language. We give both positive and negative results, some of which are general and some of which apply to policies used in practice.", acknowledgement = ack-nhfb, articleno = "6", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Pham:2016:ISS, author = "Huy Pham and Cyrus Shahabi and Yan Liu", title = "Inferring Social Strength from Spatiotemporal Data", journal = j-TODS, volume = "41", number = "1", pages = "7:1--7:??", month = apr, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2877200", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Jun 20 11:19:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The advent of geolocation technologies has generated unprecedented rich datasets of people's location information at a very high fidelity. These location datasets can be used to study human behavior; for example, social studies have shown that people who are seen together frequently at the same place and same time are most probably socially related. In this article, we are interested in inferring these social connections by analyzing people's location information; this is useful in a variety of application domains, from sales and marketing to intelligence analysis. In particular, we propose an entropy-based model (EBM) that not only infers social connections but also estimates the strength of social connections by analyzing people's co-occurrences in space and time. We examine two independent methods: diversity and weighted frequency, through which co-occurrences contribute to the strength of a social connection. In addition, we take the characteristics of each location into consideration in order to compensate for cases where only limited location information is available. We also study the role of location semantics in improving our computation of social strength. We develop a parallel implementation of our algorithm using MapReduce to create a scalable and efficient solution for online applications. We conducted extensive sets of experiments with real-world datasets including both people's location data and their social connections, where we used the latter as the ground truth to verify the results of applying our approach to the former. We show that our approach is valid across different networks and outperforms the competitors.", acknowledgement = ack-nhfb, articleno = "7", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Cheung:2016:SBL, author = "Alvin Cheung and Samuel Madden and Armando Solar-Lezama", title = "{Sloth}: Being Lazy Is a Virtue (When Issuing Database Queries)", journal = j-TODS, volume = "41", number = "2", pages = "8:1--8:??", month = jun, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2894749", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:57 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Many web applications store persistent data in databases. During execution, such applications spend a significant amount of time communicating with the database for retrieval and storing of persistent data over the network. These network round-trips represent a significant fraction of the overall execution time for many applications (especially those that issue a lot of database queries) and, as a result, increase their latency. While there has been prior work that aims to eliminate round-trips by batching queries, they are limited by (1) a requirement that developers manually identify batching opportunities, or (2) the fact that they employ static program analysis techniques that cannot exploit many opportunities for batching, as many of these opportunities require knowing precise information about the state of the running program. In this article, we present S loth, a new system that extends traditional lazy evaluation to expose query batching opportunities during application execution, even across loops, branches, and method boundaries. Many such opportunities often require expensive and sophisticated static analysis to recognize from the application source code. Rather than doing so, Sloth instead makes use of dynamic analysis to capture information about the program state and, based on that information, decides how to batch queries and when to issue them to the database. We formalize extended lazy evaluation and prove that it preserves program semantics when executed under standard semantics. Furthermore, we describe our implementation of Sloth and our experience in evaluating Sloth using over 100 benchmarks from two large-scale open-source applications, in which Sloth achieved up to a $ 3 \times $ reduction in page load time by delaying computation using extended lazy evaluation.", acknowledgement = ack-nhfb, articleno = "8", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Jensen:2016:EDC, author = "Christian S. Jensen", title = "Editorial: The Dark Citations of {TODS} Papers and What to Do about It --- or: Cite the Journal Paper", journal = j-TODS, volume = "41", number = "2", pages = "8:1--8:??", month = jun, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2946798", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:57 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "8e", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Caruccio:2016:SQV, author = "Loredana Caruccio and Giuseppe Polese and Genoveffa Tortora", title = "Synchronization of Queries and Views Upon Schema Evolutions: a Survey", journal = j-TODS, volume = "41", number = "2", pages = "9:1--9:??", month = jun, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2903726", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:57 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "One of the problems arising upon the evolution of a database schema is that some queries and views defined on the previous schema version might no longer work properly. Thus, evolving a database schema entails the redefinition of queries and views to adapt them to the new schema. Although this problem has been mainly raised in the context of traditional information systems, solutions to it are also advocated in other database-related areas, such as Data Integration, Web Data Integration, and Data Warehouses. The problem is a critical one, since industrial organizations often need to adapt their databases and data warehouses to frequent changes in the real world. In this article, we provide a survey of existing approaches and tools to the problem of adapting queries and views upon a database schema evolution; we also propose a classification framework to enable a uniform comparison method among many heterogeneous approaches and tools.", acknowledgement = ack-nhfb, articleno = "9", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Deng:2016:CMT, author = "Ting Deng and Wenfei Fan and Floris Geerts", title = "Capturing Missing Tuples and Missing Values", journal = j-TODS, volume = "41", number = "2", pages = "10:1--10:??", month = jun, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2901737", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:57 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Databases in real life are often neither entirely closed-world nor entirely open-world. Databases in an enterprise are typically partially closed, in which a part of the data is constrained by master data that contains complete information about the enterprise in certain aspects. It has been shown that, despite missing tuples, such a database may turn out to have complete information for answering a query. This article studies partially closed databases from which both tuples and attribute values may be missing. We specify such a database in terms of conditional tables constrained by master data, referred to as c -instances. We first propose three models to characterize whether a c -instance T is complete for a query Q relative to master data. That is, depending on how missing values in T are instantiated, the answer to Q in T remains unchanged when new tuples are added. We then investigate three problems, to determine (a) whether a given c -instance is complete for a query Q, (b) whether there exists a c -instance that is complete for Q relative to master data available, and (c) whether a c -instance is a minimal-size database that is complete for Q. We establish matching lower and upper bounds on these problems for queries expressed in a variety of languages in each of the three models for specifying relative completeness.", acknowledgement = ack-nhfb, articleno = "10", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Dutt:2016:PBF, author = "Anshuman Dutt and Jayant R. Haritsa", title = "Plan Bouquets: a Fragrant Approach to Robust Query Processing", journal = j-TODS, volume = "41", number = "2", pages = "11:1--11:??", month = jun, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2901738", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:57 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Identifying efficient execution plans for declarative OLAP queries typically entails estimation of several predicate selectivities. In practice, these estimates often differ significantly from the values actually encountered during query execution, leading to poor plan choices and grossly inflated response times. We propose here a conceptually new approach to address this classical problem, wherein the compile-time estimation process is completely eschewed for error-prone selectivities. Instead, from the set of optimal plans in the query's selectivity error space, a limited subset, called the ``plan bouquet,'' is selected such that at least one of the bouquet plans is 2-optimal at each location in the space. Then, at run time, a sequence of cost-budgeted executions from the plan bouquet is carried out, eventually finding a plan that executes to completion within its assigned budget. The duration and switching of these executions is controlled by a graded progression of isosurfaces projected onto the optimal performance profile. We prove that this construction results, for the first time, in guarantees on worst-case performance sub-optimality. Moreover, it ensures repeatable execution strategies across different invocations of a query. We then present a suite of enhancements to the basic plan bouquet algorithm, including randomized variants, that result in significantly stronger performance guarantees. An efficient isosurface identification algorithm is also introduced to curtail the bouquet construction overheads. The plan bouquet approach has been empirically evaluated on both PostgreSQL and a commercial DBMS, over the TPC-H and TPC-DS benchmark environments. Our experimental results indicate that it delivers substantial improvements in the worst-case behavior, without impairing the average-case performance, as compared to the native optimizers of these systems. Moreover, it can be implemented using existing optimizer infrastructure, making it relatively easy to incorporate in current database engines. Overall, the plan bouquet approach provides novel performance guarantees that open up new possibilities for robust query processing.", acknowledgement = ack-nhfb, articleno = "11", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Jung:2016:RWR, author = "Jinhong Jung and Kijung Shin and Lee Sael and U. Kang", title = "Random Walk with Restart on Large Graphs Using Block Elimination", journal = j-TODS, volume = "41", number = "2", pages = "12:1--12:??", month = jun, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2901736", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:57 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Given a large graph, how can we calculate the relevance between nodes fast and accurately? Random walk with restart (RWR) provides a good measure for this purpose and has been applied to diverse data mining applications including ranking, community detection, link prediction, and anomaly detection. Since calculating RWR from scratch takes a long time, various preprocessing methods, most of which are related to inverting adjacency matrices, have been proposed to speed up the calculation. However, these methods do not scale to large graphs because they usually produce large dense matrices that do not fit into memory. In addition, the existing methods are inappropriate when graphs dynamically change because the expensive preprocessing task needs to be computed repeatedly. In this article, we propose B ear, a fast, scalable, and accurate method for computing RWR on large graphs. Bear has two versions: a preprocessing method BearS for static graphs and an incremental update method BearD for dynamic graphs. BearS consists of the preprocessing step and the query step. In the preprocessing step, BearS reorders the adjacency matrix of a given graph so that it contains a large and easy-to-invert submatrix, and precomputes several matrices including the Schur complement of the submatrix. In the query step, BearS quickly computes the RWR scores for a given query node using a block elimination approach with the matrices computed in the preprocessing step. For dynamic graphs, BearD efficiently updates the changed parts in the preprocessed matrices of BearS based on the observation that only small parts of the preprocessed matrices change when few edges are inserted or deleted. Through extensive experiments, we show that BearS significantly outperforms other state-of-the-art methods in terms of preprocessing and query speed, space efficiency, and accuracy. We also show that BearD quickly updates the preprocessed matrices and immediately computes queries when the graph changes.", acknowledgement = ack-nhfb, articleno = "12", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Arenas:2016:ECF, author = "Marcelo Arenas and Gonzalo I. Diaz", title = "The Exact Complexity of the First-Order Logic Definability Problem", journal = j-TODS, volume = "41", number = "2", pages = "13:1--13:??", month = jun, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2886095", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:57 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We study the definability problem for first-order logic, denoted by FO-D ef. The input of FO-Def is a relational database instance I and a relation R; the question to answer is whether there exists a first-order query Q (or, equivalently, a relational algebra expression Q ) such that Q evaluated on I gives R as an answer. Although the study of FO-D ef dates back to 1978, when the decidability of this problem was shown, the exact complexity of FO-Def remains as a fundamental open problem. In this article, we provide a polynomial-time algorithm for solving FO-Def that uses calls to a graph-isomorphism subroutine (or oracle). As a consequence, the first-order definability problem is found to be complete for the class GI of all problems that are polynomial-time Turing reducible to the graph isomorphism problem, thus closing the open question about the exact complexity of this problem. The technique used is also applied to a generalized version of the problem that accepts a finite set of relation pairs, and whose exact complexity was also open; this version is also found to be GI -complete.", acknowledgement = ack-nhfb, articleno = "13", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Cohen:2016:CLT, author = "Sara Cohen and Yaacov Y. Weiss", title = "The Complexity of Learning Tree Patterns from Example Graphs", journal = j-TODS, volume = "41", number = "2", pages = "14:1--14:??", month = jun, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2890492", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:57 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "This article investigates the problem of learning tree patterns that return nodes with a given set of labels, from example graphs provided by the user. Example graphs are annotated by the user as being either positive or negative. The goal is then to determine whether there exists a tree pattern returning tuples of nodes with the given labels in each of the positive examples, but in none of the negative examples, and furthermore, to find one such pattern if it exists. These are called the satisfiability and learning problems, respectively. This article thoroughly investigates the satisfiability and learning problems in a variety of settings. In particular, we consider example sets that (1) may contain only positive examples, or both positive and negative examples, (2) may contain directed or undirected graphs, and (3) may have multiple occurrences of labels or be uniquely labeled (to some degree). In addition, we consider tree patterns of different types that can allow, or prohibit, wildcard labeled nodes and descendant edges. We also consider two different semantics for mapping tree patterns to graphs. The complexity of satisfiability is determined for the different combinations of settings. For cases in which satisfiability is polynomial, it is also shown that learning is polynomial. (This is nontrivial as satisfying patterns may be exponential in size.) Finally, the minimal learning problem, that is, that of finding a minimal-sized satisfying pattern, is studied for cases in which satisfiability is polynomial.", acknowledgement = ack-nhfb, articleno = "14", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Bailis:2016:SAV, author = "Peter Bailis and Alan Fekete and Ali Ghodsi and Joseph M. Hellerstein and Ion Stoica", title = "Scalable Atomic Visibility with {RAMP} Transactions", journal = j-TODS, volume = "41", number = "3", pages = "15:1--15:??", month = aug, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2909870", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:58 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Databases can provide scalability by partitioning data across several servers. However, multipartition, multioperation transactional access is often expensive, employing coordination-intensive locking, validation, or scheduling mechanisms. Accordingly, many real-world systems avoid mechanisms that provide useful semantics for multipartition operations. This leads to incorrect behavior for a large class of applications including secondary indexing, foreign key enforcement, and materialized view maintenance. In this work, we identify a new isolation model-Read Atomic (RA) isolation-that matches the requirements of these use cases by ensuring atomic visibility: either all or none of each transaction's updates are observed by other transactions. We present algorithms for Read Atomic Multipartition (RAMP) transactions that enforce atomic visibility while offering excellent scalability, guaranteed commit despite partial failures (via coordination-free execution ), and minimized communication between servers (via partition independence ). These RAMP transactions correctly mediate atomic visibility of updates and provide readers with snapshot access to database state by using limited multiversioning and by allowing clients to independently resolve nonatomic reads. We demonstrate that, in contrast with existing algorithms, RAMP transactions incur limited overhead-even under high contention-and scale linearly to 100 servers.", acknowledgement = ack-nhfb, articleno = "15", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{To:2016:PSE, author = "Quoc-Cuong To and Benjamin Nguyen and Philippe Pucheral", title = "Private and Scalable Execution of {SQL} Aggregates on a Secure Decentralized Architecture", journal = j-TODS, volume = "41", number = "3", pages = "16:1--16:??", month = aug, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2894750", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:58 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Current applications, from complex sensor systems (e.g., quantified self) to online e-markets, acquire vast quantities of personal information that usually end up on central servers where they are exposed to prying eyes. Conversely, decentralized architectures that help individuals keep full control of their data complexify global treatments and queries, impeding the development of innovative services. This article aims precisely at reconciling individual's privacy on one side and global benefits for the community and business perspectives on the other. It promotes the idea of pushing the security to secure hardware devices controlling the data at the place of their acquisition. Thanks to these tangible physical elements of trust, secure distributed querying protocols can reestablish the capacity to perform global computations, such as Structured Query Language (SQL) aggregates, without revealing any sensitive information to central servers. This article studies how to secure the execution of such queries in the presence of honest-but-curious and malicious attackers. It also discusses how the resulting querying protocols can be integrated in a concrete decentralized architecture. Cost models and experiments on SQL/Asymmetric Architecture (AA), our distributed prototype running on real tamper-resistant hardware, demonstrate that this approach can scale to nationwide applications.", acknowledgement = ack-nhfb, articleno = "16", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Burdick:2016:DFL, author = "Douglas Burdick and Ronald Fagin and Phokion G. Kolaitis and Lucian Popa and Wang-Chiew Tan", title = "A Declarative Framework for Linking Entities", journal = j-TODS, volume = "41", number = "3", pages = "17:1--17:??", month = aug, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2894748", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:58 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We introduce and develop a declarative framework for entity linking and, in particular, for entity resolution. As in some earlier approaches, our framework is based on a systematic use of constraints. However, the constraints we adopt are link-to-source constraints, unlike in earlier approaches where source-to-link constraints were used to dictate how to generate links. Our approach makes it possible to focus entirely on the intended properties of the outcome of entity linking, thus separating the constraints from any procedure of how to achieve that outcome. The core language consists of link-to-source constraints that specify the desired properties of a link relation in terms of source relations and built-in predicates such as similarity measures. A key feature of the link-to-source constraints is that they employ disjunction, which enables the declarative listing of all the reasons two entities should be linked. We also consider extensions of the core language that capture collective entity resolution by allowing interdependencies among the link relations. We identify a class of ``good'' solutions for entity-linking specifications, which we call maximum-value solutions and which capture the strength of a link by counting the reasons that justify it. We study natural algorithmic problems associated with these solutions, including the problem of enumerating the ``good'' solutions and the problem of finding the certain links, which are the links that appear in every ``good'' solution. We show that these problems are tractable for the core language but may become intractable once we allow interdependencies among the link relations. We also make some surprising connections between our declarative framework, which is deterministic, and probabilistic approaches such as ones based on Markov Logic Networks.", acknowledgement = ack-nhfb, articleno = "17", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Bourhis:2016:BRR, author = "Pierre Bourhis and Gabriele Puppis and Cristian Riveros and Slawek Staworko", title = "Bounded Repairability for Regular Tree Languages", journal = j-TODS, volume = "41", number = "3", pages = "18:1--18:??", month = aug, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2898995", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:58 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We study the problem of bounded repairability of a given restriction tree language R into a target tree language T. More precisely, we say that R is bounded repairable with respect to T if there exists a bound on the number of standard tree editing operations necessary to apply to any tree in R to obtain a tree in T. We consider a number of possible specifications for tree languages: bottom-up tree automata (on curry encoding of unranked trees) that capture the class of XML schemas and document type definitions (DTDs). We also consider a special case when the restriction language R is universal (i.e., contains all trees over a given alphabet). We give an effective characterization of bounded repairability between pairs of tree languages represented with automata. This characterization introduces two tools-synopsis trees and a coverage relation between them-allowing one to reason about tree languages that undergo a bounded number of editing operations. We then employ this characterization to provide upper bounds to the complexity of deciding bounded repairability and show that these bounds are tight. In particular, when the input tree languages are specified with arbitrary bottom-up automata, the problem is coNExp-complete. The problem remains coNExp-complete even if we use deterministic nonrecursive DTDs to specify the input languages. The complexity of the problem can be reduced if we assume that the alphabet, the set of node labels, is fixed: the problem becomes PS pace-complete for nonrecursive DTDs and coNP-complete for deterministic nonrecursive DTDs. Finally, when the restriction tree language R is universal, we show that the bounded repairability problem becomes Exp-complete if the target language is specified by an arbitrary bottom-up tree automaton and becomes tractable (P-complete, in fact) when a deterministic bottom-up automaton is used.", acknowledgement = ack-nhfb, articleno = "18", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Bender:2016:BTC, author = "Michael A. Bender and Roozbeh Ebrahimi and Haodong Hu and Bradley C. Kuszmaul", title = "{B}-Trees and Cache-Oblivious {B}-Trees with Different-Sized Atomic Keys", journal = j-TODS, volume = "41", number = "3", pages = "19:1--19:??", month = aug, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2907945", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:58 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Most B-tree articles assume that all $N$ keys have the same size $K$, that $ f = B / K$ keys fit in a disk block, and therefore that the search cost is $ O(\log_{f + 1} N)$ block transfers. When keys have variable size, B-tree operations have no nontrivial performance guarantees, however. This article provides B-tree-like performance guarantees on dictionaries that contain keys of different sizes in a model in which keys must be stored and compared as opaque objects. The resulting atomic-key dictionaries exhibit performance bounds in terms of the average key size and match the bounds when all keys are the same size. Atomic-key dictionaries can be built with minimal modification to the B-tree structure, simply by choosing the pivot keys properly. This article describes both static and dynamic atomic-key dictionaries. In the static case, if there are N keys with average size K, the search cost is $ O(\lceil K / B \rceil \log_{1 + \lceil B / K \rceil } N)$ expected transfers. It is not possible to transform these expected bounds into worst-case bounds. The cost to build the tree is $ O(N K)$ operations and $ O(N K / B)$ transfers if all keys are presented in sorted order. If not, the cost is the sorting cost. For the dynamic dictionaries, the amortized cost to insert a key $ \kappa $ of arbitrary length at an arbitrary rank is dominated by the cost to search for $ \kappa $. Specifically, the amortized cost to insert a key $ \kappa $ of arbitrary length and random rank is $ O(\lceil K / B \rceil \log_{1 + \lceil B / K \rceil } N + | \kappa | / B)$ transfers. A dynamic-programming algorithm is shown for constructing a search tree with minimal expected cost. This article also gives a cache-oblivious static atomic-key B-tree, which achieves the same asymptotic performance as the static B-tree dictionary, mentioned previously. A cache-oblivious data structure or algorithm is not parameterized by the block size $B$ or memory size $M$ in the memory hierarchy; rather, it is universal, working simultaneously for all possible values of $B$ or $M$. On a machine with block size $B$, if there are $N$ keys with average size $K$, search operations costs $ O(\lceil K / B \rceil \log_{1 + \lceil B / K \rceil }N)$ block transfers in expectation. This cache-oblivious layout can be built in $ O(N \log (N K))$ processor operations.", acknowledgement = ack-nhfb, articleno = "19", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Mazowiecki:2016:MDR, author = "Filip Mazowiecki and Filip Murlak and Adam Witkowski", title = "Monadic {Datalog} and Regular Tree Pattern Queries", journal = j-TODS, volume = "41", number = "3", pages = "20:1--20:??", month = aug, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2925986", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:58 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Containment of monadic datalog programs over trees is decidable. The situation is more complex when tree nodes carry labels from an infinite alphabet that can be tested for equality. It then matters whether the descendant relation is allowed or not: the descendant relation can be eliminated easily from monadic programs only when label equalities are not used. With descendant, even containment of linear monadic programs in unions of conjunctive queries is undecidable, and positive results are known only for bounded-depth trees. We show that without descendant, containment of connected monadic programs is decidable over ranked trees, but over unranked trees it is so only for linear programs. With descendant, it becomes decidable over unranked trees under restriction to downward programs: each rule only moves down from the node in the head. This restriction is motivated by regular tree pattern queries, a recent formalism in the area of ActiveXML, which we show to be equivalent to linear downward programs.", acknowledgement = ack-nhfb, articleno = "20", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Tian:2016:BHW, author = "Yuanyuan Tian and Fatma {\"O}zcan and Tao Zou and Romulo Goncalves and Hamid Pirahesh", title = "Building a Hybrid Warehouse: Efficient Joins between Data Stored in {HDFS} and Enterprise Warehouse", journal = j-TODS, volume = "41", number = "4", pages = "21:1--21:??", month = dec, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2972950", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:58 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The Hadoop Distributed File System (HDFS) has become an important data repository in the enterprise as the center for all business analytics, from SQL queries and machine learning to reporting. At the same time, enterprise data warehouses (EDWs) continue to support critical business analytics. This has created the need for a new generation of a special federation between Hadoop-like big data platforms and EDWs, which we call the hybrid warehouse. There are many applications that require correlating data stored in HDFS with EDW data, such as the analysis that associates click logs stored in HDFS with the sales data stored in the database. All existing solutions reach out to HDFS and read the data into the EDW to perform the joins, assuming that the Hadoop side does not have efficient SQL support. In this article, we show that it is actually better to do most data processing on the HDFS side, provided that we can leverage a sophisticated execution engine for joins on the Hadoop side. We identify the best hybrid warehouse architecture by studying various algorithms to join database and HDFS tables. We utilize Bloom filters to minimize the data movement and exploit the massive parallelism in both systems to the fullest extent possible. We describe a new zigzag join algorithm and show that it is a robust join algorithm for hybrid warehouses that performs well in almost all cases. We further develop a sophisticated cost model for the various join algorithms and show that it can facilitate query optimization in the hybrid warehouse to correctly choose the right algorithm under different predicate and join selectivities.", acknowledgement = ack-nhfb, articleno = "21", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Khamis:2016:JGR, author = "Mahmoud Abo Khamis and Hung Q. Ngo and Christopher R{\'e} and Atri Rudra", title = "Joins via Geometric Resolutions: Worst Case and Beyond", journal = j-TODS, volume = "41", number = "4", pages = "22:1--22:??", month = dec, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2967101", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:58 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We present a simple geometric framework for the relational join. Using this framework, we design an algorithm that achieves the fractional hypertree-width bound, which generalizes classical and recent worst-case algorithmic results on computing joins. In addition, we use our framework and the same algorithm to show a series of what are colloquially known as beyond worst-case results. The framework allows us to prove results for data stored in BTrees, multidimensional data structures, and even multiple indices per table. A key idea in our framework is formalizing the inference one does with an index as a type of geometric resolution, transforming the algorithmic problem of computing joins to a geometric problem. Our notion of geometric resolution can be viewed as a geometric analog of logical resolution. In addition to the geometry and logic connections, our algorithm can also be thought of as backtracking search with memoization.", acknowledgement = ack-nhfb, articleno = "22", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Papadimitriou:2016:GBA, author = "Dimitra Papadimitriou and Georgia Koutrika and John Mylopoulos and Yannis Velegrakis", title = "The Goal Behind the Action: Toward Goal-Aware Systems and Applications", journal = j-TODS, volume = "41", number = "4", pages = "23:1--23:??", month = dec, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2934666", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:58 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Human activity is almost always intentional, be it in a physical context or as part of an interaction with a computer system. By understanding why user-generated events are happening and what purposes they serve, a system can offer a significantly improved and more engaging experience. However, goals cannot be easily captured. Analyzing user actions such as clicks and purchases can reveal patterns and behaviors, but understanding the goals behind these actions is a different and challenging issue. Our work presents a unified, multidisciplinary viewpoint for goal management that covers many different cases where goals can be used and techniques with which they can be exploited. Our purpose is to provide a common reference point to the concepts and challenging tasks that need to be formally defined when someone wants to approach a data analysis problem from a goal-oriented point of view. This work also serves as a springboard to discuss several open challenges and opportunities for goal-oriented approaches in data management, analysis, and sharing systems and applications.", acknowledgement = ack-nhfb, articleno = "23", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Fazzinga:2016:EIC, author = "Bettina Fazzinga and Sergio Flesca and Filippo Furfaro and Francesco Parisi", title = "Exploiting Integrity Constraints for Cleaning Trajectories of {RFID}-Monitored Objects", journal = j-TODS, volume = "41", number = "4", pages = "24:1--24:??", month = dec, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2939368", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:58 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "A probabilistic framework for cleaning the data collected by Radio-Frequency IDentification (RFID) tracking systems is introduced. What has to be cleaned is the set of trajectories that are the possible interpretations of the readings: a trajectory in this set is a sequence whose generic element is a location covered by the reader(s) that made the detection at the corresponding time point. The cleaning is guided by integrity constraints and consists of discarding the inconsistent trajectories and assigning to the others a suitable probability of being the actual one. The probabilities are evaluated by adopting probabilistic conditioning that logically consists of the following steps. First, the trajectories are assigned a priori probabilities that rely on the independence assumption between the time points. Then, these probabilities are revised according to the spatio-temporal correlations encoded by the constraints. This is done by conditioning the a priori probability of each trajectory to the event that the constraints are satisfied: this means taking the ratio of this a priori probability to the sum of the a priori probabilities of all the consistent trajectories. Instead of performing these steps by materializing all the trajectories and their a priori probabilities (which is infeasible, owing to the typically huge number of trajectories), our approach exploits a data structure called conditioned trajectory graph (ct-graph) that compactly represents the trajectories and their conditioned probabilities, and an algorithm for efficiently constructing the ct-graph, which progressively builds it while avoiding the construction of components encoding inconsistent trajectories.", acknowledgement = ack-nhfb, articleno = "24", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Maabout:2016:SMU, author = "Sofian Maabout and Carlos Ordonez and Patrick Kamnang Wanko and Nicolas Hanusse", title = "Skycube Materialization Using the Topmost Skyline or Functional Dependencies", journal = j-TODS, volume = "41", number = "4", pages = "25:1--25:??", month = dec, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2955092", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:58 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Given a table $ T(I d, D_1, \ldots {}, D_d) $, the skycube of $T$ is the set of skylines with respect to to all nonempty subsets (subspaces) of the set of all dimensions $ \{ D_1, \ldots {}, D_d \} $. To optimize the evaluation of any skyline query, the solutions proposed so far in the literature either (i) precompute all of the skylines or (ii) use compression techniques so that the derivation of any skyline can be done with little effort. Even though solutions (i) are appealing because skyline queries have optimal execution time, they suffer from time and space scalability because the number of skylines to be materialized is exponential with respect to d. On the other hand, solutions (ii) are attractive in terms of memory consumption, but as we show, they also have a high time complexity. In this article, we make contributions to both kinds of solutions. We first observe that skyline patterns are monotonic. This property leads to a simple yet efficient solution for full and partial skycube materialization when the skyline with respect to all dimensions, the topmost skyline, is small. On the other hand, when the topmost skyline is large relative to the size of the input table, it turns out that functional dependencies, a fundamental concept in databases, uncover a monotonic property between skylines. Equipped with this information, we show that closed attributes sets are fundamental for partial and full skycube materialization. Extensive experiments with real and synthetic datasets show that our solutions generally outperform state-of-the-art algorithms.", acknowledgement = ack-nhfb, articleno = "25", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Dignos:2016:EKR, author = "Anton Dign{\"o}s and Michael H. B{\"o}hlen and Johann Gamper and Christian S. Jensen", title = "Extending the Kernel of a Relational {DBMS} with Comprehensive Support for Sequenced Temporal Queries", journal = j-TODS, volume = "41", number = "4", pages = "26:1--26:??", month = dec, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2967608", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:58 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Many databases contain temporal, or time-referenced, data and use intervals to capture the temporal aspect. While SQL-based database management systems (DBMSs) are capable of supporting the management of interval data, the support they offer can be improved considerably. A range of proposed temporal data models and query languages offer ample evidence to this effect. Natural queries that are very difficult to formulate in SQL are easy to formulate in these temporal query languages. The increased focus on analytics over historical data where queries are generally more complex exacerbates the difficulties and thus the potential benefits of a temporal query language. Commercial DBMSs have recently started to offer limited temporal functionality in a step-by-step manner, focusing on the representation of intervals and neglecting the implementation of the query evaluation engine. This article demonstrates how it is possible to extend the relational database engine to achieve a full-fledged, industrial-strength implementation of sequenced temporal queries, which intuitively are queries that are evaluated at each time point. Our approach reduces temporal queries to nontemporal queries over data with adjusted intervals, and it leaves the processing of nontemporal queries unaffected. Specifically, the approach hinges on three concepts: interval adjustment, timestamp propagation, and attribute scaling. Interval adjustment is enabled by introducing two new relational operators, a temporal normalizer and a temporal aligner, and the latter two concepts are enabled by the replication of timestamp attributes and the use of so-called scaling functions. By providing a set of reduction rules, we can transform any temporal query, expressed in terms of temporal relational operators, to a query expressed in terms of relational operators and the two new operators. We prove that the size of a transformed query is linear in the number of temporal operators in the original query. An integration of the new operators and the transformation rules, along with query optimization rules, into the kernel of PostgreSQL is reported. Empirical studies with the resulting temporal DBMS are covered that offer insights into pertinent design properties of the article's proposal. The new system is available as open-source software.", acknowledgement = ack-nhfb, articleno = "26", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Bourhis:2016:GBD, author = "Pierre Bourhis and Marco Manna and Michael Morak and Andreas Pieris", title = "Guarded-Based Disjunctive Tuple-Generating Dependencies", journal = j-TODS, volume = "41", number = "4", pages = "27:1--27:??", month = dec, year = "2016", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2976736", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:58 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We perform an in-depth complexity analysis of query answering under guarded-based classes of disjunctive tuple-generating dependencies (DTGDs), focusing on (unions of) conjunctive queries ((U)CQs). We show that the problem under investigation is very hard, namely 2ExpTime-complete, even for fixed sets of dependencies of a very restricted form. This is a surprising lower bound that demonstrates the enormous impact of disjunction on query answering under guarded-based tuple-generating dependencies, and also reveals the source of complexity for expressive logics such as the guarded fragment of first-order logic. We then proceed to investigate whether prominent subclasses of (U)CQs (i.e., queries of bounded treewidth and hypertree-width, and acyclic queries) have a positive impact on the complexity of the problem under consideration. We show that queries of bounded treewidth and bounded hypertree-width do not reduce the complexity of our problem, even if we focus on predicates of bounded arity or on fixed sets of DTGDs. Regarding acyclic queries, although the problem remains 2ExpTime-complete in general, in some relevant settings the complexity reduces to ExpTime-complete. Finally, with the aim of identifying tractable cases, we focus our attention on atomic queries. We show that atomic queries do not make the query answering problem easier under classes of guarded-based DTGDs that allow more than one atom to occur in the body of the dependencies. However, the complexity significantly decreases in the case of dependencies that can have only one atom in the body. In particular, we obtain a Ptime-completeness if we focus on predicates of bounded arity, and AC$_0$-membership when the set of dependencies and the query are fixed. Interestingly, our results can be used as a generic tool for establishing complexity results for query answering under various description logics.", acknowledgement = ack-nhfb, articleno = "27", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Beame:2017:EMC, author = "Paul Beame and Jerry Li and Sudeepa Roy and Dan Suciu", title = "Exact Model Counting of Query Expressions: Limitations of Propositional Methods", journal = j-TODS, volume = "42", number = "1", pages = "1:1--1:??", month = mar, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2984632", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:59 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We prove exponential lower bounds on the running time of the state-of-the-art exact model counting algorithms-algorithms for exactly computing the number of satisfying assignments, or the satisfying probability, of Boolean formulas. These algorithms can be seen, either directly or indirectly, as building Decision-Decomposable Negation Normal Form (decision-DNNF) representations of the input Boolean formulas. Decision-DNNFs are a special case of d -DNNFs where d stands for deterministic. We show that any knowledge compilation representations from a class (called DLDDs in this article) that contain decision-DNNFs can be converted into equivalent Free Binary Decision Diagrams (FBDDs), also known as Read-Once Branching Programs, with only a quasi-polynomial increase in representation size. Leveraging known exponential lower bounds for FBDDs, we then obtain similar exponential lower bounds for decision-DNNFs, which imply exponential lower bounds for model-counting algorithms. We also separate the power of decision-DNNFs from d -DNNFs and a generalization of decision-DNNFs known as AND-FBDDs. We then prove new lower bounds for FBDDs that yield exponential lower bounds on the running time of these exact model counters when applied to the problem of query evaluation in tuple-independent probabilistic databases-computing the probability of an answer to a query given independent probabilities of the individual tuples in a database instance. This approach to the query evaluation problem, in which one first obtains the lineage for the query and database instance as a Boolean formula and then performs weighted model counting on the lineage, is known as grounded inference. A second approach, known as lifted inference or extensional query evaluation, exploits the high-level structure of the query as a first-order formula. Although it has been widely believed that lifted inference is strictly more powerful than grounded inference on the lineage alone, no formal separation has previously been shown for query evaluation. In this article, we show such a formal separation for the first time. In particular, we exhibit a family of database queries for which polynomial-time extensional query evaluation techniques were previously known but for which query evaluation via grounded inference using the state-of-the-art exact model counters requires exponential time.", acknowledgement = ack-nhfb, articleno = "1", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Jensen:2017:EUE, author = "Christian S. Jensen", title = "Editorial: Updates to the Editorial Board", journal = j-TODS, volume = "42", number = "1", pages = "1:1--1:??", month = mar, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3041040", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:59 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "1e", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Liu:2017:SMD, author = "Xiufeng Liu and Lukasz Golab and Wojciech Golab and Ihab F. Ilyas and Shichao Jin", title = "Smart Meter Data Analytics: Systems, Algorithms, and Benchmarking", journal = j-TODS, volume = "42", number = "1", pages = "2:1--2:??", month = mar, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3004295", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:59 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Smart electricity meters have been replacing conventional meters worldwide, enabling automated collection of fine-grained (e.g., every 15 minutes or hourly) consumption data. A variety of smart meter analytics algorithms and applications have been proposed, mainly in the smart grid literature. However, the focus has been on what can be done with the data rather than how to do it efficiently. In this article, we examine smart meter analytics from a software performance perspective. First, we design a performance benchmark that includes common smart meter analytics tasks. These include offline feature extraction and model building as well as a framework for online anomaly detection that we propose. Second, since obtaining real smart meter data is difficult due to privacy issues, we present an algorithm for generating large realistic datasets from a small seed of real data. Third, we implement the proposed benchmark using five representative platforms: a traditional numeric computing platform (Matlab), a relational DBMS with a built-in machine learning toolkit (PostgreSQL/MADlib), a main-memory column store (``System C''), and two distributed data processing platforms (Hive and Spark/Spark Streaming). We compare the five platforms in terms of application development effort and performance on a multicore machine as well as a cluster of 16 commodity servers.", acknowledgement = ack-nhfb, articleno = "2", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Currim:2017:DMM, author = "Sabah Currim and Richard T. Snodgrass and Young-Kyoon Suh and Rui Zhang", title = "{DBMS} Metrology: Measuring Query Time", journal = j-TODS, volume = "42", number = "1", pages = "3:1--3:??", month = mar, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2996454", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:59 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "It is surprisingly hard to obtain accurate and precise measurements of the time spent executing a query because there are many sources of variance. To understand these sources, we review relevant per-process and overall measures obtainable from the Linux kernel and introduce a structural causal model relating these measures. A thorough correlational analysis provides strong support for this model. We attempted to determine why a particular measurement wasn't repeatable and then to devise ways to eliminate or reduce that variance. This enabled us to articulate a timing protocol that applies to proprietary DBMSes, that ensures the repeatability of a query, and that obtains a quite accurate query execution time while dropping very few outliers. This resulting query time measurement procedure, termed the Tucson Timing Protocol Version 2 (TTPv2), consists of the following steps: (i) perform sanity checks to ensure data validity; (ii) drop some query executions via clearly motivated predicates; (iii) drop some entire queries at a cardinality, again via clearly motivated predicates; (iv) for those that remain, compute a single measured time by a carefully justified formula over the underlying measures of the remaining query executions; and (v) perform post-analysis sanity checks. The result is a mature, general, robust, self-checking protocol that provides a more precise and more accurate timing of the query. The protocol is also applicable to other operating domains in which measurements of multiple processes each doing computation and I/O is needed.", acknowledgement = ack-nhfb, articleno = "3", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Wu:2017:CFC, author = "You Wu and Pankaj K. Agarwal and Chengkai Li and Jun Yang and Cong Yu", title = "Computational Fact Checking through Query Perturbations", journal = j-TODS, volume = "42", number = "1", pages = "4:1--4:??", month = mar, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2996453", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:59 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Our media is saturated with claims of ``facts'' made from data. Database research has in the past focused on how to answer queries, but has not devoted much attention to discerning more subtle qualities of the resulting claims, for example, is a claim ``cherry-picking''? This article proposes a framework that models claims based on structured data as parameterized queries. Intuitively, with its choice of the parameter setting, a claim presents a particular (and potentially biased) view of the underlying data. A key insight is that we can learn a lot about a claim by ``perturbing'' its parameters and seeing how its conclusion changes. For example, a claim is not robust if small perturbations to its parameters can change its conclusions significantly. This framework allows us to formulate practical fact-checking tasks-reverse-engineering vague claims, and countering questionable claims-as computational problems. Along with the modeling framework, we develop an algorithmic framework that enables efficient instantiations of ``meta'' algorithms by supplying appropriate algorithmic building blocks. We present real-world examples and experiments that demonstrate the power of our model, efficiency of our algorithms, and usefulness of their results.", acknowledgement = ack-nhfb, articleno = "4", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Teflioudi:2017:EAM, author = "Christina Teflioudi and Rainer Gemulla", title = "Exact and Approximate Maximum Inner Product Search with {LEMP}", journal = j-TODS, volume = "42", number = "1", pages = "5:1--5:??", month = mar, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2996452", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:59 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We study exact and approximate methods for maximum inner product search, a fundamental problem in a number of data mining and information retrieval tasks. We propose the LEMP framework, which supports both exact and approximate search with quality guarantees. At its heart, LEMP transforms a maximum inner product search problem over a large database of vectors into a number of smaller cosine similarity search problems. This transformation allows LEMP to prune large parts of the search space immediately and to select suitable search algorithms for each of the remaining problems individually. LEMP is able to leverage existing methods for cosine similarity search, but we also provide a number of novel search algorithms tailored to our setting. We conducted an extensive experimental study that provides insight into the performance of many state-of-the-art techniques-including LEMP-on multiple real-world datasets. We found that LEMP often was significantly faster or more accurate than alternative methods.", acknowledgement = ack-nhfb, articleno = "5", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Shi:2017:UUA, author = "Xiaogang Shi and Bin Cui and Gillian Dobbie and Beng Chin Ooi", title = "{UniAD}: a Unified Ad Hoc Data Processing System", journal = j-TODS, volume = "42", number = "1", pages = "6:1--6:??", month = mar, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3009957", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:59 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Instead of constructing complex declarative queries, many users prefer to write their programs using procedural code embedded with simple queries. Since many users are not expert programmers or the programs are written in a rush, these programs usually exhibit poor performance in practice and it is a challenge to automatically and efficiently optimize these programs. In this article, we present UniAD, which stands for Uni fied execution for Ad hoc Data processing, a system designed to simplify the programming of data processing tasks and provide efficient execution for user programs. We provide the background of program semantics and propose a novel intermediate representation, called Unified Intermediate Representation (UniIR), which utilizes a simple and expressive mechanism HOQ to describe the operations performed in programs. By combining both procedural and declarative logics with the proposed intermediate representation, we can perform various optimizations across the boundary between procedural and declarative code. We propose a transformation-based optimizer to automatically optimize programs and implement the UniAD system. The extensive experimental results on various benchmarks demonstrate that our techniques can significantly improve the performance of a wide range of data processing programs.", acknowledgement = ack-nhfb, articleno = "6", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Song:2017:RDD, author = "Shaoxu Song and Lei Chen", title = "Response to {``Differential Dependencies Revisited''}", journal = j-TODS, volume = "42", number = "1", pages = "7:1--7:??", month = mar, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/2983602", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Mar 2 18:02:59 MST 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "See \cite{Song:2011:DDR,Vincent:2015:TCD}.", abstract = "A recent article [Vincent et al. 2015] concerns the correctness of several results in reasoning about differential dependencies ( dds), originally reported in Song and Chen [2011]. The major concern by Vincent et al. [2015] roots from assuming a type of infeasible differential functions in the given dds for consistency and implication analysis, which are not allowed in Song and Chen [2011]. A differential function is said to be infeasible if there is no tuple pair with values that can satisfy the specified distance constraints. For example, [price($ < 2 $, $ > 4$)] requires the difference of two price values to be $ < 2$ and $ > 4$ at the same time, which is clearly impossible. Although dds involving infeasible differential functions may be syntactically interesting, they are semantically meaningless and would neither be specified by domain experts nor discovered from data. For these reasons, infeasible differential functions are not considered [Song and Chen 2011] and the results in Song and Chen [2011] are correct, in contrast to what is claimed in Vincent et al. [2015].", acknowledgement = ack-nhfb, articleno = "7", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Paparrizos:2017:FAT, author = "John Paparrizos and Luis Gravano", title = "Fast and Accurate Time-Series Clustering", journal = j-TODS, volume = "42", number = "2", pages = "8:1--8:??", month = jun, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3044711", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 23 16:29:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The proliferation and ubiquity of temporal data across many disciplines has generated substantial interest in the analysis and mining of time series. Clustering is one of the most popular data-mining methods, not only due to its exploratory power but also because it is often a preprocessing step or subroutine for other techniques. In this article, we present $k$-Shape and $k$-MultiShapes ($k$-MS), two novel algorithms for time-series clustering. $k$-Shape and $k$-MS rely on a scalable iterative refinement procedure. As their distance measure, $k$-Shape and $k$-MS use shape-based distance (SBD), a normalized version of the cross-correlation measure, to consider the shapes of time series while comparing them. Based on the properties of SBD, we develop two new methods, namely ShapeExtraction (SE) and MultiShapesExtraction (MSE), to compute cluster centroids that are used in every iteration to update the assignment of time series to clusters. $k$-Shape relies on SE to compute a single centroid per cluster based on all time series in each cluster. In contrast, $k$-MS relies on MSE to compute multiple centroids per cluster to account for the proximity and spatial distribution of time series in each cluster. To demonstrate the robustness of SBD, $k$-Shape, and $k$-MS, we perform an extensive experimental evaluation on 85 datasets against state-of-the-art distance measures and clustering methods for time series using rigorous statistical analysis. SBD, our efficient and parameter-free distance measure, achieves similar accuracy to Dynamic Time Warping (DTW), a highly accurate but computationally expensive distance measure that requires parameter tuning. For clustering, we compare $k$-Shape and $k$-MS against scalable and non-scalable partitional, hierarchical, spectral, density-based, and shapelet-based methods, with combinations of the most competitive distance measures. $k$-Shape outperforms all scalable methods in terms of accuracy. Furthermore, $k$-Shape also outperforms all non-scalable approaches, with one exception, namely $k$-medoids with DTW, which achieves similar accuracy. However, unlike $k$-Shape, this approach requires tuning of its distance measure and is significantly slower than $k$-Shape. $k$-MS performs similarly to $k$-Shape in comparison to rival methods, but $k$-MS is significantly more accurate than $k$-Shape. Beyond clustering, we demonstrate the effectiveness of $k$-Shape to reduce the search space of one-nearest-neighbor classifiers for time series. Overall, SBD, $k$-Shape, and $k$-MS emerge as domain-independent, highly accurate, and efficient methods for time-series comparison and clustering with broad applications.", acknowledgement = ack-nhfb, articleno = "8", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Koutris:2017:CQA, author = "Paraschos Koutris and Jef Wijsen", title = "Consistent Query Answering for Self-Join-Free Conjunctive Queries Under Primary Key Constraints", journal = j-TODS, volume = "42", number = "2", pages = "9:1--9:??", month = jun, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3068334", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 23 16:29:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "A relational database is said to be uncertain if primary key constraints can possibly be violated. A repair (or possible world) of an uncertain database is obtained by selecting a maximal number of tuples without ever selecting two distinct tuples with the same primary key value. For any Boolean query $q$, CERTAINTY($q$) is the problem that takes an uncertain database db as input and asks whether $q$ is true in every repair of db. The complexity of this problem has been particularly studied for $q$ ranging over the class of self-join-free Boolean conjunctive queries. A research challenge is to determine, given $q$, whether CERTAINTY($q$) belongs to complexity classes FO, $P$, or coNP-complete. In this article, we combine existing techniques for studying this complexity classification task. We show that, for any self-join-free Boolean conjunctive query $q$, it can be decided whether or not CERTAINTY($q$) is in FO. We additionally show how to construct a single SQL query for solving CERTAINTY($q$) if it is in FO. Further, for any self-join-free Boolean conjunctive query $q$, CERTAINTY($q$) is either in $P$ or coNP-complete and the complexity dichotomy is effective. This settles a research question that has been open for 10 years.", acknowledgement = ack-nhfb, articleno = "9", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Yu:2017:ODM, author = "Yanwei Yu and Lei Cao and Elke A. Rundensteiner and Qin Wang", title = "Outlier Detection over Massive-Scale Trajectory Streams", journal = j-TODS, volume = "42", number = "2", pages = "10:1--10:??", month = jun, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3013527", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 23 16:29:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The detection of abnormal moving objects over high-volume trajectory streams is critical for real-time applications ranging from military surveillance to transportation management. Yet this outlier detection problem, especially along both the spatial and temporal dimensions, remains largely unexplored. In this work, we propose a rich taxonomy of novel classes of neighbor-based trajectory outlier definitions that model the anomalous behavior of moving objects for a large range of real-time applications. Our theoretical analysis and empirical study on two real-world datasets-the Beijing Taxi trajectory data and the Ground Moving Target Indicator data stream-and one generated Moving Objects dataset demonstrate the effectiveness of our taxonomy in effectively capturing different types of abnormal moving objects. Furthermore, we propose a general strategy for efficiently detecting these new outlier classes called the {\em minimal examination\/} (MEX) framework. The MEX framework features three core optimization principles, which leverage spatiotemporal as well as the predictability properties of the neighbor evidence to minimize the detection costs. Based on this foundation, we design algorithms that detect the outliers based on these classes of new outlier semantics that successfully leverage our optimization principles. Our comprehensive experimental study demonstrates that our proposed MEX strategy drives the detection costs 100-fold down into the practical realm for applications that analyze high-volume trajectory streams in near real time.", acknowledgement = ack-nhfb, articleno = "10", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Han:2017:CCF, author = "Yunheng Han and Weiwei Sun and Baihua Zheng", title = "{COMPRESS}: a Comprehensive Framework of Trajectory Compression in Road Networks", journal = j-TODS, volume = "42", number = "2", pages = "11:1--11:??", month = jun, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3015457", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 23 16:29:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "More and more advanced technologies have become available to collect and integrate an unprecedented amount of data from multiple sources, including GPS trajectories about the traces of moving objects. Given the fact that GPS trajectories are vast in size while the information carried by the trajectories could be redundant, we focus on trajectory compression in this article. As a systematic solution, we propose a comprehensive framework, namely, COMPRESS (Comprehensive Paralleled Road-Network-Based Trajectory Compression), to compress GPS trajectory data in an urban road network. In the preprocessing step, COMPRESS decomposes trajectories into spatial paths and temporal sequences, with a thorough justification for trajectory decomposition. In the compression step, COMPRESS performs spatial compression on spatial paths, and temporal compression on temporal sequences in parallel. It introduces two alternative algorithms with different strengths for lossless spatial compression and designs lossy but error-bounded algorithms for temporal compression. It also presents query processing algorithms to support error-bounded location-based queries on compressed trajectories without full decompression. All algorithms under COMPRESS are efficient and have the time complexity of $ O (| T |) $, where $ | T | $ is the size of the input trajectory $T$. We have also conducted a comprehensive experimental study to demonstrate the effectiveness of COMPRESS, whose compression ratio is significantly better than related approaches.", acknowledgement = ack-nhfb, articleno = "11", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{TenCate:2017:AAS, author = "Balder {Ten Cate} and Phokion G. Kolaitis and Kun Qian and Wang-Chiew Tan", title = "Approximation Algorithms for Schema-Mapping Discovery from Data Examples", journal = j-TODS, volume = "42", number = "2", pages = "12:1--12:??", month = jun, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3044712", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 23 16:29:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In recent years, data examples have been at the core of several different approaches to schema-mapping design. In particular, Gottlob and Senellart introduced a framework for schema-mapping discovery from a single data example, in which the derivation of a schema mapping is cast as an optimization problem. Our goal is to refine and study this framework in more depth. Among other results, we design a polynomial-time $\log(n)$-approximation algorithm for computing optimal schema mappings from a given set of data examples (where $n$ is the combined size of the given data examples) for a restricted class of schema mappings; moreover, we show that this approximation ratio cannot be improved. In addition to the complexity-theoretic results, we implemented the aforementioned $\log(n)$-approximation algorithm and carried out an experimental evaluation in a real-world mapping scenario.", acknowledgement = ack-nhfb, articleno = "12", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Maniu:2017:IFQ, author = "Silviu Maniu and Reynold Cheng and Pierre Senellart", title = "An Indexing Framework for Queries on Probabilistic Graphs", journal = j-TODS, volume = "42", number = "2", pages = "13:1--13:??", month = jun, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3044713", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Jun 23 16:29:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Information in many applications, such as mobile wireless systems, social networks, and road networks, is captured by graphs. In many cases, such information is uncertain. We study the problem of querying a probabilistic graph, in which vertices are connected to each other probabilistically. In particular, we examine ``source-to-target'' queries (ST-queries), such as computing the shortest path between two vertices. The major difference with the deterministic setting is that query answers are enriched with probabilistic annotations. Evaluating ST-queries over probabilistic graphs is \#P-hard, as it requires examining an exponential number of ``possible worlds''-database instances generated from the probabilistic graph. Existing solutions to the ST-query problem, which sample possible worlds, have two downsides: (i) a possible world can be very large and (ii) many samples are needed for reasonable accuracy. To tackle these issues, we study the ProbTree, a data structure that stores a succinct, or indexed, version of the possible worlds of the graph. Existing ST-query solutions are executed on top of this structure, with the number of samples and sizes of the possible worlds reduced. We examine lossless and lossy methods for generating the ProbTree, which reflect the tradeoff between the accuracy and efficiency of query evaluation. We analyze the correctness and complexity of these approaches. Our extensive experiments on real datasets show that the ProbTree is fast to generate and small in size. It also enhances the accuracy and efficiency of existing ST-query algorithms significantly.", acknowledgement = ack-nhfb, articleno = "13", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Gan:2017:HAE, author = "Junhao Gan and Yufei Tao", title = "On the Hardness and Approximation of {Euclidean} {DBSCAN}", journal = j-TODS, volume = "42", number = "3", pages = "14:1--14:??", month = aug, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3083897", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Aug 24 16:30:15 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "DBSCAN is a method proposed in 1996 for clustering multi-dimensional points, and has received extensive applications. Its computational hardness is still unsolved to this date. The original KDD'96 paper claimed an algorithm of $ O(n \log n) $ ``average runtime complexity'' (where n is the number of data points) without a rigorous proof. In 2013, a genuine $ O(n \log n) $-time algorithm was found in 2D space under Euclidean distance. The hardness of dimensionality $ d \geq 3$ has remained open ever since. This article considers the problem of computing DBSCAN clusters from scratch (assuming no existing indexes) under Euclidean distance. We prove that, for $ d \geq 3$, the problem requires $ \omega (n^{4 / 3}) $ time to solve, unless very significant breakthroughs-ones widely believed to be impossible-could be made in theoretical computer science. Motivated by this, we propose a relaxed version of the problem called $ \rho $- approximate DBSCAN, which returns the same clusters as DBSCAN, unless the clusters are ``unstable'' (i.e., they change once the input parameters are slightly perturbed). The $ \rho $-approximate problem can be settled in $ O(n) $ expected time regardless of the constant dimensionality d. The article also enhances the previous result on the exact DBSCAN problem in 2D space. We show that, if the n data points have been pre-sorted on each dimension (i.e., one sorted list per dimension), the problem can be settled in $ O(n) $ worst-case time. As a corollary, when all the coordinates are integers, the 2D DBSCAN problem can be solved in $ O(n \log \log n) $ time deterministically, improving the existing $ O(n \log n) $ bound.", acknowledgement = ack-nhfb, articleno = "14", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Martens:2017:BCS, author = "Wim Martens and Frank Neven and Matthias Niewerth and Thomas Schwentick", title = "{BonXai}: Combining the Simplicity of {DTD} with the Expressiveness of {XML} Schema", journal = j-TODS, volume = "42", number = "3", pages = "15:1--15:??", month = aug, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3105960", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Aug 24 16:30:15 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "While the migration from DTD to XML Schema was driven by a need for increased expressivity and flexibility, the latter was also significantly more complex to use and understand. Whereas DTDs are characterized by their simplicity, XML Schema Documents are notoriously difficult. In this article, we introduce the XML specification language BonXai, which incorporates many features of XML Schema but is arguably almost as easy to use as DTDs. In brief, the latter is achieved by sacrificing the explicit use of types in favor of simple patterns expressing contexts for elements. The goal of BonXai is not to replace XML Schema but rather to provide a simpler alternative for users who want to go beyond the expressiveness and features of DTD but do not need the explicit use of types. Furthermore, XML Schema processing tools can be used as a back-end for BonXai, since BonXai can be automatically converted into XML Schema. A particularly strong point of BonXai is its solid foundation rooted in a decade of theoretical work around pattern-based schemas. We present a formal model for a core fragment of BonXai and the translation algorithms to and from a core fragment of XML Schema. We prove that BonXai and XML Schema can be converted back-and-forth on the level of tree languages and we formally study the size trade-offs between the two languages.", acknowledgement = ack-nhfb, articleno = "15", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Zheng:2017:ESB, author = "Weiguo Zheng and Lei Zou and Lei Chen and Dongyan Zhao", title = "Efficient {SimRank}-Based Similarity Join", journal = j-TODS, volume = "42", number = "3", pages = "16:1--16:??", month = aug, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3083899", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Aug 24 16:30:15 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Graphs have been widely used to model complex data in many real-world applications. Answering vertex join queries over large graphs is meaningful and interesting, which can benefit friend recommendation in social networks and link prediction, and so on. In this article, we adopt ``SimRank'' [13] to evaluate the similarity between two vertices in a large graph because of its generality. Note that ``SimRank'' is purely structure dependent, and it does not rely on the domain knowledge. Specifically, we define a SimRank-based join (SRJ) query to find all vertex pairs satisfying the threshold from two sets of vertices $U$ and $V$. To reduce the search space, we propose a shortest-path-distance-based upper bound for SimRank scores to prune unpromising vertex pairs. In the verification, we propose a novel index, called h-go cover$^+$, to efficiently compute the SimRank score of any single vertex pair. Given a graph $G$, we only materialize the SimRank scores of a small proportion of vertex pairs (i.e., the h-go cover$^+$ vertex pairs), based on which the SimRank score of any vertex pair can be computed easily. To find the h-go cover$^+$ vertex pairs, we propose an efficient method without building the vertex-pair graph. Hence, large graphs can be dealt with easily. Extensive experiments over both real and synthetic datasets confirm the efficiency of our solution.", acknowledgement = ack-nhfb, articleno = "16", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Kaminski:2017:QNA, author = "Mark Kaminski and Egor V. Kostylev and Bernardo Cuenca Grau", title = "Query Nesting, Assignment, and Aggregation in {SPARQL 1.1}", journal = j-TODS, volume = "42", number = "3", pages = "17:1--17:??", month = aug, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3083898", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Aug 24 16:30:15 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Answering aggregate queries is a key requirement of emerging applications of Semantic Technologies, such as data warehousing, business intelligence, and sensor networks. To fulfil the requirements of such applications, the standardization of SPARQL 1.1 led to the introduction of a wide range of constructs that enable value computation, aggregation, and query nesting. In this article, we provide an in-depth formal analysis of the semantics and expressive power of these new constructs as defined in the SPARQL 1.1 specification, and hence lay the necessary foundations for the development of robust, scalable, and extensible query engines supporting complex numerical and analytics tasks.", acknowledgement = ack-nhfb, articleno = "17", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Tschirschnitz:2017:DID, author = "Fabian Tschirschnitz and Thorsten Papenbrock and Felix Naumann", title = "Detecting Inclusion Dependencies on Very Many Tables", journal = j-TODS, volume = "42", number = "3", pages = "18:1--18:??", month = aug, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3105959", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Aug 24 16:30:15 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Detecting inclusion dependencies, the prerequisite of foreign keys, in relational data is a challenging task. Detecting them among the hundreds of thousands or even millions of tables on the web is daunting. Still, such inclusion dependencies can help connect disparate pieces of information on the Web and reveal unknown relationships among tables. With the algorithm M any, we present a novel inclusion dependency detection algorithm, specialized for the very many --- but typically small --- tables found on the Web. We make use of Bloom filters and indexed bit-vectors to show the feasibility of our approach. Our evaluation on two corpora of Web tables shows a superior runtime over known approaches and its usefulness to reveal hidden structures on the Web.", acknowledgement = ack-nhfb, articleno = "18", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Schubert:2017:DRR, author = "Erich Schubert and J{\"o}rg Sander and Martin Ester and Hans Peter Kriegel and Xiaowei Xu", title = "{DBSCAN} Revisited, Revisited: Why and How You Should (Still) Use {DBSCAN}", journal = j-TODS, volume = "42", number = "3", pages = "19:1--19:??", month = aug, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3068335", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Aug 24 16:30:15 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tods/; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "At SIGMOD 2015, an article was presented with the title ``DBSCAN Revisited: Mis-Claim, Un-Fixability, and Approximation'' that won the conference's best paper award. In this technical correspondence, we want to point out some inaccuracies in the way DBSCAN was represented, and why the criticism should have been directed at the assumption about the performance of spatial index structures such as R-trees and not at an algorithm that can use such indexes. We will also discuss the relationship of DBSCAN performance and the indexability of the dataset, and discuss some heuristics for choosing appropriate DBSCAN parameters. Some indicators of bad parameters will be proposed to help guide future users of this algorithm in choosing parameters such as to obtain both meaningful results and good performance. In new experiments, we show that the new SIGMOD 2015 methods do not appear to offer practical benefits if the DBSCAN parameters are well chosen and thus they are primarily of theoretical interest. In conclusion, the original DBSCAN algorithm with effective indexes and reasonably chosen parameter values performs competitively compared to the method proposed by Gan and Tao.", acknowledgement = ack-nhfb, articleno = "19", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Aberger:2017:ERE, author = "Christopher R. Aberger and Andrew Lamb and Susan Tu and Andres N{\"o}tzli and Kunle Olukotun and Christopher R{\'e}", title = "{EmptyHeaded}: a Relational Engine for Graph Processing", journal = j-TODS, volume = "42", number = "4", pages = "20:1--20:??", month = nov, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3129246", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Nov 18 10:18:22 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "There are two types of high-performance graph processing engines: low- and high-level engines. Low-level engines (Galois, PowerGraph, Snap) provide optimized data structures and computation models but require users to write low-level imperative code, hence ensuring that efficiency is the burden of the user. In high-level engines, users write in query languages like datalog (SociaLite) or SQL (Grail). High-level engines are easier to use but are orders of magnitude slower than the low-level graph engines. We present EmptyHeaded, a high-level engine that supports a rich datalog-like query language and achieves performance comparable to that of low-level engines. At the core of EmptyHeaded's design is a new class of join algorithms that satisfy strong theoretical guarantees, but have thus far not achieved performance comparable to that of specialized graph processing engines. To achieve high performance, EmptyHeaded introduces a new join engine architecture, including a novel query optimizer and execution engine that leverage single-instruction multiple data (SIMD) parallelism. With this architecture, EmptyHeaded outperforms high-level approaches by up to three orders of magnitude on graph pattern queries, PageRank, and Single-Source Shortest Paths (SSSP) and is an order of magnitude faster than many low-level baselines. We validate that EmptyHeaded competes with the best-of-breed low-level engine (Galois), achieving comparable performance on PageRank and at most 3$ \times $ worse performance on SSSP. Finally, we show that the EmptyHeaded design can easily be extended to accommodate a standard resource description framework (RDF) workload, the LUBM benchmark. On the LUBM benchmark, we show that EmptyHeaded can compete with and sometimes outperform two high-level, but specialized RDF baselines (TripleBit and RDF-3X), while outperforming MonetDB by up to three orders of magnitude and LogicBlox by up to two orders of magnitude.", acknowledgement = ack-nhfb, articleno = "20", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Arenas:2017:DQL, author = "Marcelo Arenas and Martin Ugarte", title = "Designing a Query Language for {RDF}: Marrying Open and Closed Worlds", journal = j-TODS, volume = "42", number = "4", pages = "21:1--21:??", month = nov, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3129247", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Nov 18 10:18:22 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "When querying an Resource Description Framework (RDF) graph, a prominent feature is the possibility of extending the answer to a query with optional information. However, the definition of this feature in SPARQL-the standard RDF query language-has raised some important issues. Most notably, the use of this feature increases the complexity of the evaluation problem, and its closed-world semantics is in conflict with the underlying open-world semantics of RDF. Many approaches for fixing such problems have been proposed, the most prominent being the introduction of the semantic notion of weakly monotone SPARQL query. Weakly monotone SPARQL queries have shaped the class of queries that conform to the open-world semantics of RDF. Unfortunately, finding an effective way of restricting SPARQL to the fragment of weakly monotone queries has proven to be an elusive problem. In practice, the most widely adopted fragment for writing SPARQL queries is based on the syntactic notion of well designedness. This notion has proven to be a good approach for writing SPARQL queries, but its expressive power has yet to be fully understood. The starting point of this article is to understand the relation between well-designed queries and the semantic notion of weak monotonicity. It is known that every well-designed SPARQL query is weakly monotone; as our first contribution we prove that the converse does not hold, even if an extension of this notion based on the use of disjunction is considered. Given this negative result, we embark on the task of defining syntactic fragments that are weakly monotone and have higher expressive power than the fragment of well-designed queries. To this end, we move to a more general scenario where infinite RDF graphs are also allowed, so interpolation techniques studied for first-order logic can be applied. With the use of these techniques, we are able to define a new operator for SPARQL that gives rise to a query language with the desired properties (over finite and infinite RDF graphs). It should be noticed that every query in this fragment is weakly monotone if we restrict the semantics to finite RDF graphs. Moreover, we use this result to provide a simple characterization of the class of monotone CONSTRUCT queries, that is, the class of SPARQL queries that produce RDF graphs as output. Finally, we pinpoint the complexity of the evaluation problem for the query languages identified in the article.", acknowledgement = ack-nhfb, articleno = "21", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Barany:2017:DPP, author = "Vince B{\'a}r{\'a}ny and Balder {Ten Cate} and Benny Kimelfeld and Dan Olteanu and Zografoula Vagena", title = "Declarative Probabilistic Programming with {Datalog}", journal = j-TODS, volume = "42", number = "4", pages = "22:1--22:??", month = nov, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3132700", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Nov 18 10:18:22 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Probabilistic programming languages are used for developing statistical models. They typically consist of two components: a specification of a stochastic process (the prior) and a specification of observations that restrict the probability space to a conditional subspace (the posterior). Use cases of such formalisms include the development of algorithms in machine learning and artificial intelligence. In this article, we establish a probabilistic-programming extension of Datalog that, on the one hand, allows for defining a rich family of statistical models, and on the other hand retains the fundamental properties of declarativity. Our proposed extension provides mechanisms to include common numerical probability functions; in particular, conclusions of rules may contain values drawn from such functions. The semantics of a program is a probability distribution over the possible outcomes of the input database with respect to the program. Observations are naturally incorporated by means of integrity constraints over the extensional and intensional relations. The resulting semantics is robust under different chases and invariant to rewritings that preserve logical equivalence.", acknowledgement = ack-nhfb, articleno = "22", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Alvaro:2017:BCA, author = "Peter Alvaro and Neil Conway and Joseph M. Hellerstein and David Maier", title = "{Blazes}: Coordination Analysis and Placement for Distributed Programs", journal = j-TODS, volume = "42", number = "4", pages = "23:1--23:??", month = nov, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3110214", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Nov 18 10:18:22 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Distributed consistency is perhaps the most-discussed topic in distributed systems today. Coordination protocols can ensure consistency, but in practice they cause undesirable performance unless used judiciously. Scalable distributed architectures avoid coordination whenever possible, but under-coordinated systems can exhibit behavioral anomalies under fault, which are often extremely difficult to debug. This raises significant challenges for distributed system architects and developers. In this article, we present B lazes, a cross-platform program analysis framework that (a) identifies program locations that require coordination to ensure consistent executions, and (b) automatically synthesizes application-specific coordination code that can significantly outperform general-purpose techniques. We present two case studies, one using annotated programs in the Twitter Storm system and another using the Bloom declarative language.", acknowledgement = ack-nhfb, articleno = "23", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Colazzo:2017:LTM, author = "Dario Colazzo and Giorgio Ghelli and Carlo Sartiani", title = "Linear Time Membership in a Class of Regular Expressions with Counting, Interleaving, and Unordered Concatenation", journal = j-TODS, volume = "42", number = "4", pages = "24:1--24:??", month = nov, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3132701", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Nov 18 10:18:22 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/string-matching.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Regular Expressions (REs) are ubiquitous in database and programming languages. While many applications make use of REs extended with interleaving (shuffle) and unordered concatenation operators, this extension badly affects the complexity of basic operations, and, especially, makes membership NP-hard, which is unacceptable in most practical scenarios. In this article, we study the problem of membership checking for a restricted class of these extended REs, called conflict-free REs, which are expressive enough to cover the vast majority of real-world applications. We present several polynomial algorithms for membership checking over conflict-free REs. The algorithms are all polynomial and differ in terms of adopted optimization techniques and in the kind of supported operators. As a particular application, we generalize the approach to check membership of Extensible Markup Language trees into a class of EDTDs (Extended Document Type Definitions) that models the crucial aspects of DTDs (Document Type Definitions) and XSD (XML Schema Definitions) schemas. Results about an extensive experimental analysis validate the efficiency of the presented membership checking techniques.", acknowledgement = ack-nhfb, articleno = "24", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Zhang:2017:PPD, author = "Jun Zhang and Graham Cormode and Cecilia M. Procopiuc and Divesh Srivastava and Xiaokui Xiao", title = "{PrivBayes}: Private Data Release via {Bayesian} Networks", journal = j-TODS, volume = "42", number = "4", pages = "25:1--25:??", month = nov, year = "2017", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3134428", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Nov 18 10:18:22 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Privacy-preserving data publishing is an important problem that has been the focus of extensive study. The state-of-the-art solution for this problem is differential privacy, which offers a strong degree of privacy protection without making restrictive assumptions about the adversary. Existing techniques using differential privacy, however, cannot effectively handle the publication of high-dimensional data. In particular, when the input dataset contains a large number of attributes, existing methods require injecting a prohibitive amount of noise compared to the signal in the data, which renders the published data next to useless. To address the deficiency of the existing methods, this paper presents PrivBayes, a differentially private method for releasing high-dimensional data. Given a dataset D, PrivBayes first constructs a Bayesian network N, which (i) provides a succinct model of the correlations among the attributes in D and (ii) allows us to approximate the distribution of data in D using a set P of low-dimensional marginals of D. After that, PrivBayes injects noise into each marginal in P to ensure differential privacy and then uses the noisy marginals and the Bayesian network to construct an approximation of the data distribution in D. Finally, PrivBayes samples tuples from the approximate distribution to construct a synthetic dataset, and then releases the synthetic data. Intuitively, PrivBayes circumvents the curse of dimensionality, as it injects noise into the low-dimensional marginals in P instead of the high-dimensional dataset D. Private construction of Bayesian networks turns out to be significantly challenging, and we introduce a novel approach that uses a surrogate function for mutual information to build the model more accurately. We experimentally evaluate PrivBayes on real data and demonstrate that it significantly outperforms existing solutions in terms of accuracy.", acknowledgement = ack-nhfb, articleno = "25", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Jensen:2018:EUE, author = "Christian S. Jensen", title = "Editorial: Updates to the {Editorial Board}", journal = j-TODS, volume = "43", number = "1", pages = "1:1--1:??", month = apr, year = "2018", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3183376", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Apr 11 18:02:25 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, articleno = "1", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Demertzis:2018:PPR, author = "Ioannis Demertzis and Stavros Papadopoulos and Odysseas Papapetrou and Antonios Deligiannakis and Minos Garofalakis and Charalampos Papamanthou", title = "Practical Private Range Search in Depth", journal = j-TODS, volume = "43", number = "1", pages = "2:1--2:??", month = apr, year = "2018", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3167971", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Apr 11 18:02:25 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We consider a data owner that outsources its dataset to an untrusted server. The owner wishes to enable the server to answer range queries on a single attribute, without compromising the privacy of the data and the queries. There are several schemes on ``practical'' private range search (mainly in database venues) that attempt to strike a trade-off between efficiency and security. Nevertheless, these methods either lack provable security guarantees or permit unacceptable privacy leakages. In this article, we take an interdisciplinary approach, which combines the rigor of security formulations and proofs with efficient data management techniques. We construct a wide set of novel schemes with realistic security/performance trade-offs, adopting the notion of Searchable Symmetric Encryption (SSE), primarily proposed for keyword search. We reduce range search to multi-keyword search using range-covering techniques with tree-like indexes, and formalize the problem as Range Searchable Symmetric Encryption (RSSE). We demonstrate that, given any secure SSE scheme, the challenge boils down to (i) formulating leakages that arise from the index structure and (ii) minimizing false positives incurred by some schemes under heavy data skew. We also explain an important concept in the recent SSE bibliography, namely locality, and design generic and specialized ways to attribute locality to our RSSE schemes. Moreover, we are the first to devise secure schemes for answering range aggregate queries, such as range sums and range min/max. We analytically detail the superiority of our proposals over prior work and experimentally confirm their practicality.", acknowledgement = ack-nhfb, articleno = "2", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Chung:2018:EIU, author = "Yeounoh Chung and Michael Lind Mortensen and Carsten Binnig and Tim Kraska", title = "Estimating the Impact of Unknown Unknowns on Aggregate Query Results", journal = j-TODS, volume = "43", number = "1", pages = "3:1--3:??", month = apr, year = "2018", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3167970", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Apr 11 18:02:25 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "It is common practice for data scientists to acquire and integrate disparate data sources to achieve higher quality results. But even with a perfectly cleaned and merged data set, two fundamental questions remain: (1) Is the integrated data set complete? and (2) What is the impact of any unknown (i.e., unobserved) data on query results? In this work, we develop and analyze techniques to estimate the impact of the unknown data (a.k.a., unknown unknowns ) on simple aggregate queries. The key idea is that the overlap between different data sources enables us to estimate the number and values of the missing data items. Our main techniques are parameter-free and do not assume prior knowledge about the distribution; we also propose a parametric model that can be used instead when the data sources are imbalanced. Through a series of experiments, we show that estimating the impact of unknown unknowns is invaluable to better assess the results of aggregate queries over integrated data sources.", acknowledgement = ack-nhfb, articleno = "3", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Shaikhha:2018:BEQ, author = "Amir Shaikhha and Yannis Klonatos and Christoph Koch", title = "Building Efficient Query Engines in a High-Level Language", journal = j-TODS, volume = "43", number = "1", pages = "4:1--4:??", month = apr, year = "2018", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3183653", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Apr 11 18:02:25 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Abstraction without regret refers to the vision of using high-level programming languages for systems development without experiencing a negative impact on performance. A database system designed according to this vision offers both increased productivity and high performance instead of sacrificing the former for the latter as is the case with existing, monolithic implementations that are hard to maintain and extend. In this article, we realize this vision in the domain of analytical query processing. We present LegoBase, a query engine written in the high-level programming language Scala. The key technique to regain efficiency is to apply generative programming: LegoBase performs source-to-source compilation and optimizes database systems code by converting the high-level Scala code to specialized, low-level C code. We show how generative programming allows to easily implement a wide spectrum of optimizations, such as introducing data partitioning or switching from a row to a column data layout, which are difficult to achieve with existing low-level query compilers that handle only queries. We demonstrate that sufficiently powerful abstractions are essential for dealing with the complexity of the optimization effort, shielding developers from compiler internals and decoupling individual optimizations from each other. We evaluate our approach with the TPC-H benchmark and show that (a) with all optimizations enabled, our architecture significantly outperforms a commercial in-memory database as well as an existing query compiler. (b) Programmers need to provide just a few hundred lines of high-level code for implementing the optimizations, instead of complicated low-level code that is required by existing query compilation approaches. (c) These optimizations may potentially come at the cost of using more system memory for improved performance. (d) The compilation overhead is low compared to the overall execution time, thus making our approach usable in practice for compiling query engines.", acknowledgement = ack-nhfb, articleno = "4", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Libkin:2018:TNA, author = "Leonid Libkin and Juan L. Reutter and Adri{\'a}n Soto and Domagoj Vrgoc", title = "{TriAL}: a Navigational Algebra for {RDF} Triplestores", journal = j-TODS, volume = "43", number = "1", pages = "5:1--5:??", month = apr, year = "2018", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3154385", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Apr 11 18:02:25 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/string-matching.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Navigational queries over RDF data are viewed as one of the main applications of graph query languages, and yet the standard model of graph databases --- essentially labeled graphs --- is different from the triples-based model of RDF. While encodings of RDF databases into graph data exist, we show that even the most natural ones are bound to lose some functionality when used in conjunction with graph query languages. The solution is to work directly with triples, but then many properties taken for granted in the graph database context (e.g., reachability) lose their natural meaning. Our goal is to introduce languages that work directly over triples and are closed, i.e., they produce sets of triples, rather than graphs. Our basic language is called TriAL, or Triple Algebra: it guarantees closure properties by replacing the product with a family of join operations. We extend TriAL with recursion and explain why such an extension is more intricate for triples than for graphs. We present a declarative language, namely a fragment of datalog, capturing the recursive algebra. For both languages, the combined complexity of query evaluation is given by low-degree polynomials. We compare our language with previously studied graph query languages such as adaptations of XPath, regular path queries, and nested regular expressions; many of these languages are subsumed by the recursive triple algebra. We also provide an implementation of recursive TriAL on top of a relational query engine, and we show its usefulness by running a wide array of navigational queries over real-world RDF data, while at the same time testing how our implementation compares to existing RDF systems.", acknowledgement = ack-nhfb, articleno = "5", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Cao:2018:BQR, author = "Yang Cao and Wenfei Fan and Floris Geerts and Ping Lu", title = "Bounded Query Rewriting Using Views", journal = j-TODS, volume = "43", number = "1", pages = "6:1--6:??", month = apr, year = "2018", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3183673", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Apr 11 18:02:25 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "A query $Q$ in a language $L$ has a bounded rewriting using a set of $L$-definable views if there exists a query $ Q^'$ in $L$ such that given any dataset $D$, $ Q(D)$ can be computed by $ Q_'$ that accesses only cached views and a small fraction $ D_Q$ of $D$. We consider datasets $D$ that satisfy a set of access constraints, which are a combination of simple cardinality constraints and associated indices, such that the size $ |D_Q|$ of $ D_Q$ and the time to identify $ D_Q$ are independent of $ |D|$, no matter how big $D$ is. In this article, we study the problem for deciding whether a query has a bounded rewriting given a set $V$ of views and a set $A$ of access constraints. We establish the complexity of the problem for various query languages $L$, from $ \Sigma_3^p$-complete for conjunctive queries (CQ) to undecidable for relational algebra (FO). We show that the intractability for CQ is rather robust even for acyclic CQ with fixed $V$ and $A$, and characterize when the problem is in PTIME. To make practical use of bounded rewriting, we provide an effective syntax for FO queries that have a bounded rewriting. The syntax characterizes a key subclass of such queries without sacrificing the expressive power, and can be checked in PTIME. Finally, we investigate $ L_1$-to-$ L_2$ bounded rewriting, when Q in $ L_1$ is allowed to be rewritten into a query $ Q^'$ in another language $ L_2$. We show that this relaxation does not simplify the analysis of bounded query rewriting using views.", acknowledgement = ack-nhfb, articleno = "6", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Berkholz:2018:AFQ, author = "Christoph Berkholz and Jens Keppeler and Nicole Schweikardt", title = "Answering {FO+MOD} Queries under Updates on Bounded Degree Databases", journal = j-TODS, volume = "43", number = "2", pages = "7:1--7:??", month = sep, year = "2018", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3232056", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Jan 29 17:36:12 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We investigate the query evaluation problem for fixed queries over fully dynamic databases, where tuples can be inserted or deleted. The task is to design a dynamic algorithm that immediately reports the new result of a fixed query after every database update. We consider queries in first-order logic (FO) and its extension with modulo-counting quantifiers (FO+MOD) and show that they can be efficiently evaluated under updates, provided that the dynamic database does not exceed a certain degree bound. In particular, we construct a data structure that allows us to answer a Boolean FO+MOD query and to compute the size of the result of a non-Boolean query within constant time after every database update. Furthermore, after every database update, we can update the data structure in constant time such that afterwards we are able to test within constant time for a given tuple whether or not it belongs to the query result, to enumerate all tuples in the new query result, and to enumerate the difference between the old and the new query result with constant delay between the output tuples. The preprocessing time needed to build the data structure is linear in the size of the database. Our results extend earlier work on the evaluation of first-order queries on static databases of bounded degree and rely on an effective Hanf normal form for FO+MOD recently obtained by Heimberg, Kuske, and Schweikardt (LICS 2016).", acknowledgement = ack-nhfb, articleno = "7", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Barcelo:2018:EES, author = "Pablo Barcel{\'o} and Markus Kr{\"o}ll and Reinhard Pichler and Sebastian Skritek", title = "Efficient Evaluation and Static Analysis for Well-Designed Pattern Trees with Projection", journal = j-TODS, volume = "43", number = "2", pages = "8:1--8:??", month = sep, year = "2018", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3233983", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Jan 29 17:36:12 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Conjunctive queries (CQs) fail to provide an answer when the pattern described by the query does not exactly match the data. CQs might thus be too restrictive as a querying mechanism when data is semistructured or incomplete. The semantic web therefore provides a formalism-known as (projected) well-designed pattern trees (pWDPTs)-that tackles this problem: pWDPTs allow us to formulate queries that match parts of the query over the data if available, but do not ignore answers of the remaining query otherwise. Here we abstract away the specifics of semantic web applications and study pWDPTs over arbitrary relational schemas. Since the language of pWDPTs subsumes CQs, their evaluation problem is intractable. We identify structural properties of pWDPTs that lead to (fixed-parameter) tractability of various variants of the evaluation problem. We also show that checking if a pWDPT is equivalent to one in our tractable class is in 2EXPTIME. As a corollary, we obtain fixed-parameter tractability of evaluation for pWDPTs with such good behavior. Our techniques also allow us to develop a theory of approximations for pWDPTs.", acknowledgement = ack-nhfb, articleno = "8", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Lazerson:2018:LMD, author = "Arnon Lazerson and Daniel Keren and Assaf Schuster", title = "Lightweight Monitoring of Distributed Streams", journal = j-TODS, volume = "43", number = "2", pages = "9:1--9:??", month = sep, year = "2018", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3226113", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Jan 29 17:36:12 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "As data becomes dynamic, large, and distributed, there is increasing demand for what have become known as distributed stream algorithms. Since continuously collecting the data to a central server and processing it there is infeasible, a common approach is to define local conditions at the distributed nodes, such that-as long as they are maintained-some desirable global condition holds. Previous methods derived local conditions focusing on communication efficiency. While proving very useful for reducing the communication volume, these local conditions often suffer from heavy computational burden at the nodes. The computational complexity of the local conditions affects both the runtime and the energy consumption. These are especially critical for resource-limited devices like smartphones and sensor nodes. Such devices are becoming more ubiquitous due to the recent trend toward smart cities and the Internet of Things. To accommodate for high data rates and limited resources of these devices, it is crucial that the local conditions be quickly and efficiently evaluated. Here we propose a novel approach, designated CB (for Convex/Concave Bounds). CB defines local conditions using suitably chosen convex and concave functions. Lightweight and simple, these local conditions can be rapidly checked on the fly. CB's superiority over the state-of-the-art is demonstrated in its reduced runtime and power consumption, by up to six orders of magnitude in some cases. As an added bonus, CB also reduced communication overhead in all the tested application scenarios.", acknowledgement = ack-nhfb, articleno = "9", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Qi:2018:RQU, author = "Jianzhong Qi and Fei Zuo and Hanan Samet and Jia Cheng Yao", title = "{$K$}-Regret Queries Using Multiplicative Utility Functions", journal = j-TODS, volume = "43", number = "2", pages = "10:1--10:??", month = sep, year = "2018", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3230634", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Jan 29 17:36:12 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The k -regret query aims to return a size- k subset S of a database D such that, for any query user that selects a data object from this size- k subset S rather than from database D, her regret ratio is minimized. The regret ratio here is modeled by the relative difference in the optimality between the locally optimal object in S and the globally optimal object in D. The optimality of a data object in turn is modeled by a utility function of the query user. Unlike traditional top- k queries, the k -regret query does not minimize the regret ratio for a specific utility function. Instead, it considers a family of infinite utility functions F, and aims to find a size- k subset that minimizes the maximum regret ratio of any utility function in F. Studies on k -regret queries have focused on the family of additive utility functions, which have limitations in modeling individuals' preferences and decision-making processes, especially for a common observation called the diminishing marginal rate of substitution (DMRS). We introduce k -regret queries with multiplicative utility functions, which are more expressive in modeling the DMRS, to overcome those limitations. We propose a query algorithm with bounded regret ratios. To showcase the applicability of the algorithm, we apply it to a special family of multiplicative utility functions, the Cobb--Douglas family of utility functions, and a closely related family of utility functions, the Constant Elasticity of Substitution family of utility functions, both of which are frequently used utility functions in microeconomics. After a further study of the query properties, we propose a heuristic algorithm that produces even smaller regret ratios in practice. Extensive experiments on the proposed algorithms confirm that they consistently achieve small maximum regret ratios.", acknowledgement = ack-nhfb, articleno = "10", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Kimelfeld:2018:RFC, author = "Benny Kimelfeld and Christopher R{\'e}", title = "A Relational Framework for Classifier Engineering", journal = j-TODS, volume = "43", number = "3", pages = "11:1--11:??", month = nov, year = "2018", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3268931", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Jan 29 17:36:13 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In the design of analytical procedures and machine learning solutions, a critical and time-consuming task is that of feature engineering, for which various recipes and tooling approaches have been developed. In this article, we embark on the establishment of database foundations for feature engineering. We propose a formal framework for classification in the context of a relational database. The goal of this framework is to open the way to research and techniques to assist developers with the task of feature engineering by utilizing the database's modeling and understanding of data and queries and by deploying the well-studied principles of database management. As a first step, we demonstrate the usefulness of this framework by formally defining three key algorithmic challenges. The first challenge is that of separability, which is the problem of determining the existence of feature queries that agree with the training examples. The second is that of evaluating the VC dimension of the model class with respect to a given sequence of feature queries. The third challenge is identifiability, which is the task of testing for a property of independence among features that are represented as database queries. We give preliminary results on these challenges for the case where features are defined by means of conjunctive queries, and, in particular, we study the implication of various traditional syntactic restrictions on the inherent computational complexity.", acknowledgement = ack-nhfb, articleno = "11", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Schwentick:2018:DCU, author = "Thomas Schwentick and Nils Vortmeier and Thomas Zeume", title = "Dynamic Complexity under Definable Changes", journal = j-TODS, volume = "43", number = "3", pages = "12:1--12:??", month = nov, year = "2018", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3241040", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Jan 29 17:36:13 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In the setting of dynamic complexity, the goal of a dynamic program is to maintain the result of a fixed query for an input database that is subject to changes, possibly using additional auxiliary relations. In other words, a dynamic program updates a materialized view whenever a base relation is changed. The update of query result and auxiliary relations is specified using first-order logic or, equivalently, relational algebra. The original framework by Patnaik and Immerman only considers changes to the database that insert or delete single tuples. This article extends the setting to definable changes, also specified by first-order queries on the database, and generalizes previous maintenance results to these more expressive change operations. More specifically, it is shown that the undirected reachability query is first-order maintainable under single-tuple changes and first-order defined insertions, likewise the directed reachability query for directed acyclic graphs is first-order maintainable under insertions defined by quantifier-free first-order queries. These results rely on bounded bridge properties, which basically say that, after an insertion of a defined set of edges, for each connected pair of nodes there is some path with a bounded number of new edges. While this bound can be huge, in general, it is shown to be small for insertion queries defined by unions of conjunctive queries. To illustrate that the results for this restricted setting could be practically relevant, they are complemented by an experimental study that compares the performance of dynamic programs with complex changes, dynamic programs with single changes, and with recomputation from scratch. The positive results are complemented by several inexpressibility results. For example, it is shown that-unlike for single-tuple insertions-dynamic programs that maintain the reachability query under definable, quantifier-free changes strictly need update formulas with quantifiers. Finally, further positive results unrelated to reachability are presented: it is shown that for changes definable by parameter-free first-order formulas, all LOGSPACE-definable (and even AC$^1$-definable) queries can be maintained by first-order dynamic programs.", acknowledgement = ack-nhfb, articleno = "12", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Arenas:2018:ELQ, author = "Marcelo Arenas and Georg Gottlob and Andreas Pieris", title = "Expressive Languages for Querying the {Semantic Web}", journal = j-TODS, volume = "43", number = "3", pages = "13:1--13:??", month = nov, year = "2018", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3238304", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Jan 29 17:36:13 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The problem of querying RDF data is a central issue for the development of the Semantic Web. The query language SPARQL has become the standard language for querying RDF since its W3C standardization in 2008. However, the 2008 version of this language missed some important functionalities: reasoning capabilities to deal with RDFS and OWL vocabularies, navigational capabilities to exploit the graph structure of RDF data, and a general form of recursion much needed to express some natural queries. To overcome these limitations, a new version of SPARQL, called SPARQL 1.1, was released in 2013, which includes entailment regimes for RDFS and OWL vocabularies, and a mechanism to express navigation patterns through regular expressions. Unfortunately, there are a number of useful navigation patterns that cannot be expressed in SPARQL 1.1, and the language lacks a general mechanism to express recursive queries. To the best of our knowledge, no efficient RDF query language that combines the above functionalities is known. It is the aim of this work to fill this gap. To this end, we focus on a core fragment of the OWL 2 QL profile of OWL 2 and show that every SPARQL query enriched with the above features can be naturally translated into a query expressed in a language that is based on an extension of Datalog, which allows for value invention and stratified negation. However, the query evaluation problem for this language is highly intractable, which is not surprising since it is expressive enough to encode some inherently hard queries. We identify a natural fragment of it, and we show it to be tractable and powerful enough to define SPARQL queries enhanced with the desired functionalities.", acknowledgement = ack-nhfb, articleno = "13", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Polychroniou:2018:DJD, author = "Orestis Polychroniou and Wangda Zhang and Kenneth A. Ross", title = "Distributed Joins and Data Placement for Minimal Network Traffic", journal = j-TODS, volume = "43", number = "3", pages = "14:1--14:??", month = nov, year = "2018", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3241039", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Jan 29 17:36:13 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Network communication is the slowest component of many operators in distributed parallel databases deployed for large-scale analytics. Whereas considerable work has focused on speeding up databases on modern hardware, communication reduction has received less attention. Existing parallel DBMSs rely on algorithms designed for disks with minor modifications for networks. A more complicated algorithm may burden the CPUs but could avoid redundant transfers of tuples across the network. We introduce track join, a new distributed join algorithm that minimizes network traffic by generating an optimal transfer schedule for each distinct join key. Track join extends the trade-off options between CPU and network. Track join explicitly detects and exploits locality, also allowing for advanced placement of tuples beyond hash partitioning on a single attribute. We propose a novel data placement algorithm based on track join that minimizes the total network cost of multiple joins across different dimensions in an analytical workload. Our evaluation shows that track join outperforms hash join on the most expensive queries of real workloads regarding both network traffic and execution time. Finally, we show that our data placement optimization approach is both robust and effective in minimizing the total network cost of joins in analytical workloads.", acknowledgement = ack-nhfb, articleno = "14", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Abuzaid:2018:MPA, author = "Firas Abuzaid and Peter Bailis and Jialin Ding and Edward Gan and Samuel Madden and Deepak Narayanan and Kexin Rong and Sahaana Suri", title = "{MacroBase}: Prioritizing Attention in Fast Data", journal = j-TODS, volume = "43", number = "4", pages = "15:1--15:??", month = dec, year = "2018", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3276463", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Jan 29 17:36:16 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "As data volumes continue to rise, manual inspection is becoming increasingly untenable. In response, we present MacroBase, a data analytics engine that prioritizes end-user attention in high-volume fast data streams. MacroBase enables efficient, accurate, and modular analyses that highlight and aggregate important and unusual behavior, acting as a search engine for fast data. MacroBase is able to deliver order-of-magnitude speedups over alternatives by optimizing the combination of explanation (i.e., feature selection) and classification tasks and by leveraging a new reservoir sampler and heavy-hitters sketch specialized for fast data streams. As a result, MacroBase delivers accurate results at speeds of up to 2M events per second per query on a single core. The system has delivered meaningful results in production, including at a telematics company monitoring hundreds of thousands of vehicles.", acknowledgement = ack-nhfb, articleno = "15", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Dayan:2018:OBF, author = "Niv Dayan and Manos Athanassoulis and Stratos Idreos", title = "Optimal {Bloom} Filters and Adaptive Merging for {LSM}-Trees", journal = j-TODS, volume = "43", number = "4", pages = "16:1--16:??", month = dec, year = "2018", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3276980", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Jan 29 17:36:16 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In this article, we show that key-value stores backed by a log-structured merge-tree (LSM-tree) exhibit an intrinsic tradeoff between lookup cost, update cost, and main memory footprint, yet all existing designs expose a suboptimal and difficult to tune tradeoff among these metrics. We pinpoint the problem to the fact that modern key-value stores suboptimally co-tune the merge policy, the buffer size, and the Bloom filters' false-positive rates across the LSM-tree's different levels. We present Monkey, an LSM-tree based key-value store that strikes the optimal balance between the costs of updates and lookups with any given main memory budget. The core insight is that worst-case lookup cost is proportional to the sum of the false-positive rates of the Bloom filters across all levels of the LSM-tree. Contrary to state-of-the-art key-value stores that assign a fixed number of bits-per-element to all Bloom filters, Monkey allocates memory to filters across different levels so as to minimize the sum of their false-positive rates. We show analytically that Monkey reduces the asymptotic complexity of the worst-case lookup I/O cost, and we verify empirically using an implementation on top of RocksDB that Monkey reduces lookup latency by an increasing margin as the data volume grows (50--80\% for the data sizes we experimented with). Furthermore, we map the design space onto a closed-form model that enables adapting the merging frequency and memory allocation to strike the best tradeoff among lookup cost, update cost and main memory, depending on the workload (proportion of lookups and updates), the dataset (number and size of entries), and the underlying hardware (main memory available, disk vs. flash). We show how to use this model to answer what-if design questions about how changes in environmental parameters impact performance and how to adapt the design of the key-value store for optimal performance.", acknowledgement = ack-nhfb, articleno = "16", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Meneghetti:2018:LQA, author = "Niccol{\`o} Meneghetti and Oliver Kennedy and Wolfgang Gatterbauer", title = "Learning From Query-Answers: a Scalable Approach to Belief Updating and Parameter Learning", journal = j-TODS, volume = "43", number = "4", pages = "17:1--17:??", month = dec, year = "2018", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3277503", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Jan 29 17:36:16 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Tuple-independent and disjoint-independent probabilistic databases (TI- and DI-PDBs) represent uncertain data in a factorized form as a product of independent random variables that represent either tuples (TI-PDBs) or sets of tuples (DI-PDBs). When the user submits a query, the database derives the marginal probabilities of each output-tuple, exploiting the underlying assumptions of statistical independence. While query processing in TI- and DI-PDBs has been studied extensively, limited research has been dedicated to the problems of updating or deriving the parameters from observations of query results. Addressing this problem is the main focus of this article. We first introduce Beta Probabilistic Databases (B-PDBs), a generalization of TI-PDBs designed to support both (i) belief updating and (ii) parameter learning in a principled and scalable way. The key idea of B-PDBs is to treat each parameter as a latent, Beta-distributed random variable. We show how this simple expedient enables both belief updating and parameter learning in a principled way, without imposing any burden on regular query processing. Building on B-PDBs, we then introduce Dirichlet Probabilistic Databases (D-PDBs), a generalization of DI-PDBs with similar properties. We provide the following key contributions for both B- and D-PDBs: (i) We study the complexity of performing Bayesian belief updates and devise efficient algorithms for certain tractable classes of queries; (ii) we propose a soft-EM algorithm for computing maximum-likelihood estimates of the parameters; (iii) we present an algorithm for efficiently computing conditional probabilities, allowing us to efficiently implement B- and D-PDBs via a standard relational engine; and (iv) we support our conclusions with extensive experimental results.", acknowledgement = ack-nhfb, articleno = "17", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Fan:2018:PSG, author = "Wenfei Fan and Wenyuan Yu and Jingbo Xu and Jingren Zhou and Xiaojian Luo and Qiang Yin and Ping Lu and Yang Cao and Ruiqi Xu", title = "Parallelizing Sequential Graph Computations", journal = j-TODS, volume = "43", number = "4", pages = "18:1--18:??", month = dec, year = "2018", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3282488", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Jan 29 17:36:16 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "This article presents GRAPE, a parallel GRAPhEngine for graph computations. GRAPE differs from prior systems in its ability to parallelize existing sequential graph algorithms as a whole, without the need for recasting the entire algorithm into a new model. Underlying GRAPE are a simple programming model and a principled approach based on fixpoint computation that starts with partial evaluation and uses an incremental function as the intermediate consequence operator. We show that users can devise existing sequential graph algorithms with minor additions, and GRAPE parallelizes the computation. Under a monotonic condition, the GRAPE parallelization guarantees to converge at correct answers as long as the sequential algorithms are correct. Moreover, we show that algorithms in MapReduce, BSP, and PRAM can be optimally simulated on GRAPE. In addition to the ease of programming, we experimentally verify that GRAPE achieves comparable performance to the state-of-the-art graph systems using real-life and synthetic graphs.", acknowledgement = ack-nhfb, articleno = "18", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Kipf:2019:SAF, author = "Andreas Kipf and Varun Pandey and Jan B{\"o}ttcher and Lucas Braun and Thomas Neumann and Alfons Kemper", title = "Scalable Analytics on Fast Data", journal = j-TODS, volume = "44", number = "1", pages = "1:1--1:??", month = jan, year = "2019", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3283811", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Jan 29 17:36:16 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Today's streaming applications demand increasingly high event throughput rates and are often subject to strict latency constraints. To allow for more complex workloads, such as window-based aggregations, streaming systems need to support stateful event processing. This introduces new challenges for streaming engines as the state needs to be maintained in a consistent and durable manner and simultaneously accessed by complex queries for real-time analytics. Modern streaming systems, such as Apache Flink, do not allow for efficiently exposing the state to analytical queries. Thus, data engineers are forced to keep the state in external data stores, which significantly increases the latencies until events become visible to analytical queries. Proprietary solutions have been created to meet data freshness constraints. These solutions are expensive, error-prone, and difficult to maintain. Main-memory database systems, such as HyPer, achieve extremely low query response times while maintaining high update rates, which makes them well-suited for analytical streaming workloads. In this article, we explore extensions to database systems to match the performance and usability of streaming systems.", acknowledgement = ack-nhfb, articleno = "1", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Li:2019:WJX, author = "Feifei Li and Bin Wu and Ke Yi and Zhuoyue Zhao", title = "Wander Join and {XDB}: Online Aggregation via Random Walks", journal = j-TODS, volume = "44", number = "1", pages = "2:1--2:??", month = jan, year = "2019", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3284551", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Jan 29 17:36:16 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Joins are expensive, and online aggregation over joins was proposed to mitigate the cost, which offers users a nice and flexible tradeoff between query efficiency and accuracy in a continuous, online fashion. However, the state-of-the-art approach, in both internal and external memory, is based on ripple join, which is still very expensive and even needs unrealistic assumptions (e.g., tuples in a table are stored in random order). This article proposes a new approach, the wander join algorithm, to the online aggregation problem by performing random walks over the underlying join graph. We also design an optimizer that chooses the optimal plan for conducting the random walks without having to collect any statistics a priori. Compared with ripple join, wander join is particularly efficient for equality joins involving multiple tables, but also supports $ \theta $-joins. Selection predicates and group-by clauses can be handled as well. To demonstrate the usefulness of wander join, we have designed and implemented XDB (approXimate DB) by integrating wander join into various systems including PostgreSQL, Spark, and a stand-alone plug-in version using PL/SQL. The design and implementation of XDB has demonstrated wander join's practicality in a full-fledged database system. Extensive experiments using the TPC-H benchmark have demonstrated the superior performance of wander join over ripple join.", acknowledgement = ack-nhfb, articleno = "2", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Bai:2019:HMD, author = "Ran Bai and Wing Kai Hon and Eric Lo and Zhian He and Kenny Zhu", title = "Historic Moments Discovery in Sequence Data", journal = j-TODS, volume = "44", number = "1", pages = "3:1--3:??", month = jan, year = "2019", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3276975", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Jan 29 17:36:16 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Many emerging applications are based on finding interesting subsequences from sequence data. Finding ``prominent streaks,'' a set of the longest contiguous subsequences with values all above (or below) a certain threshold, from sequence data is one of that kind that receives much attention. Motivated from real applications, we observe that prominent streaks alone are not insightful enough but require the discovery of something we coined as ``historic moments'' as companions. In this article, we present an algorithm to efficiently compute historic moments from sequence data. The algorithm is incremental and space optimal, meaning that when facing new data arrival, it is able to efficiently refresh the results by keeping minimal information. Case studies show that historic moments can significantly improve the insights offered by prominent streaks alone. Furthermore, experiments show that our algorithm can outperform the baseline in both time and space.", acknowledgement = ack-nhfb, articleno = "3", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Alexandrov:2019:ROE, author = "Alexander Alexandrov and Georgi Krastev and Volker Markl", title = "Representations and Optimizations for Embedded Parallel Dataflow Languages", journal = j-TODS, volume = "44", number = "1", pages = "4:1--4:??", month = jan, year = "2019", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3281629", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Jan 29 17:36:16 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Parallel dataflow engines such as Apache Hadoop, Apache Spark, and Apache Flink are an established alternative to relational databases for modern data analysis applications. A characteristic of these systems is a scalable programming model based on distributed collections and parallel transformations expressed by means of second-order functions such as map and reduce. Notable examples are Flink's DataSet and Spark's RDD programming abstractions. These programming models are realized as EDSLs-domain specific languages embedded in a general-purpose host language such as Java, Scala, or Python. This approach has several advantages over traditional external DSLs such as SQL or XQuery. First, syntactic constructs from the host language (e.g., anonymous functions syntax, value definitions, and fluent syntax via method chaining) can be reused in the EDSL. This eases the learning curve for developers already familiar with the host language. Second, it allows for seamless integration of library methods written in the host language via the function parameters passed to the parallel dataflow operators. This reduces the effort for developing analytics dataflows that go beyond pure SQL and require domain-specific logic. At the same time, however, state-of-the-art parallel dataflow EDSLs exhibit a number of shortcomings. First, one of the main advantages of an external DSL such as SQL-the high-level, declarative Select-From-Where syntax-is either lost completely or mimicked in a non-standard way. Second, execution aspects such as caching, join order, and partial aggregation have to be decided by the programmer. Optimizing them automatically is very difficult due to the limited program context available in the intermediate representation of the DSL. In this article, we argue that the limitations listed above are a side effect of the adopted type-based embedding approach. As a solution, we propose an alternative EDSL design based on quotations. We present a DSL embedded in Scala and discuss its compiler pipeline, intermediate representation, and some of the enabled optimizations. We promote the algebraic type of bags in union representation as a model for distributed collections and its associated structural recursion scheme and monad as a model for parallel collection processing. At the source code level, Scala's comprehension syntax over a bag monad can be used to encode Select-From-Where expressions in a standard way. At the intermediate representation level, maintaining comprehensions as a first-class citizen can be used to simplify the design and implementation of holistic dataflow optimizations that accommodate for nesting and control-flow. The proposed DSL design therefore reconciles the benefits of embedded parallel dataflow DSLs with the declarativity and optimization potential of external DSLs like SQL.", acknowledgement = ack-nhfb, articleno = "4", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Fan:2019:DG, author = "Wenfei Fan and Ping Lu", title = "Dependencies for Graphs", journal = j-TODS, volume = "44", number = "2", pages = "5:1--5:??", month = apr, year = "2019", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3287285", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 29 10:55:20 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3287285", abstract = "This article proposes a class of dependencies for graphs, referred to as graph entity dependencies (GEDs). A GED is defined as a combination of a graph pattern and an attribute dependency. In a uniform format, GEDs can express graph functional dependencies with constant literals to catch inconsistencies, and keys carrying id literals to identify entities (vertices) in a graph. We revise the chase for GEDs and prove its Church--Rosser property. We characterize GED satisfiability and implication, and establish the complexity of these problems and the validation problem for GEDs, in the presence and absence of constant literals and id literals. We also develop a sound, complete and independent axiom system for finite implication of GEDs. In addition, we extend GEDs with built-in predicates or disjunctions, to strike a balance between the expressive power and complexity. We settle the complexity of the satisfiability, implication, and validation problems for these extensions.", acknowledgement = ack-nhfb, articleno = "5", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Hu:2019:OOM, author = "Xiao Hu and Ke Yi and Yufei Tao", title = "Output-Optimal Massively Parallel Algorithms for Similarity Joins", journal = j-TODS, volume = "44", number = "2", pages = "6:1--6:??", month = apr, year = "2019", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3311967", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 29 10:55:20 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3311967", abstract = "Parallel join algorithms have received much attention in recent years due to the rapid development of massively parallel systems such as MapReduce and Spark. In the database theory community, most efforts have been focused on studying worst-case optimal algorithms. However, the worst-case optimality of these join algorithms relies on the hard instances having very large output sizes. In the case of a two-relation join, the hard instance is just a Cartesian product, with an output size that is quadratic in the input size. In practice, however, the output size is usually much smaller. One recent parallel join algorithm by Beame et al. has achieved output-optimality (i.e., its cost is optimal in terms of both the input size and the output size), but their algorithm only works for a 2-relation equi-join and has some imperfections. In this article, we first improve their algorithm to true optimality. Then we design output-optimal algorithms for a large class of similarity joins. Finally, we present a lower bound, which essentially eliminates the possibility of having output-optimal algorithms for any join on more than two relations.", acknowledgement = ack-nhfb, articleno = "6", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Dautrich:2019:IIT, author = "Jonathan L. {Dautrich, Jr.} and Chinya V. Ravishankar", title = "Inferring Insertion Times and Optimizing Error Penalties in Time-decaying {Bloom} Filters", journal = j-TODS, volume = "44", number = "2", pages = "7:1--7:??", month = apr, year = "2019", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3284552", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 29 10:55:20 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3284552", abstract = "Current Bloom Filters tend to ignore Bayesian priors as well as a great deal of useful information they hold, compromising the accuracy of their responses. Incorrect responses cause users to incur penalties that are both application- and item-specific, but current Bloom Filters are typically tuned only for static penalties. Such shortcomings are problematic for all Bloom Filter variants, but especially so for Time-decaying Bloom Filters, in which the memory of older items decays over time, causing both false positives and false negatives. We address these issues by introducing inferential filters, which integrate Bayesian priors and information latent in filters to make penalty-optimal, query-specific decisions. We also show how to properly infer insertion times in such filters. Our methods are general, but here we illustrate their application to inferential time-decaying filters to support novel query types and sliding window queries with dynamic error penalties. We present inferential versions of the Timing Bloom Filter and Generalized Bloom Filter. Our experiments on real and synthetic datasets show that our methods reduce penalties for incorrect responses to sliding-window queries in these filters by up to 70\% when penalties are dynamic.", acknowledgement = ack-nhfb, articleno = "7", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Gummidi:2019:SSC, author = "Srinivasa Raghavendra Bhuvan Gummidi and Xike Xie and Torben Bach Pedersen", title = "A Survey of Spatial Crowdsourcing", journal = j-TODS, volume = "44", number = "2", pages = "8:1--8:??", month = apr, year = "2019", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3291933", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 29 10:55:20 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3291933", abstract = "Widespread use of advanced mobile devices has led to the emergence of a new class of crowdsourcing called spatial crowdsourcing. Spatial crowdsourcing advances the potential of a crowd to perform tasks related to real-world scenarios involving physical locations, which were not feasible with conventional crowdsourcing methods. The main feature of spatial crowdsourcing is the presence of spatial tasks that require workers to be physically present at a particular location for task fulfillment. Research related to this new paradigm has gained momentum in recent years, necessitating a comprehensive survey to offer a bird's-eye view of the current state of spatial crowdsourcing literature. In this article, we discuss the spatial crowdsourcing infrastructure and identify the fundamental differences between spatial and conventional crowdsourcing. Furthermore, we provide a comprehensive view of the existing literature by introducing a taxonomy, elucidate the issues/challenges faced by different components of spatial crowdsourcing, and suggest potential research directions for the future.", acknowledgement = ack-nhfb, articleno = "8", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Damme:2019:CES, author = "Patrick Damme and Annett Ungeth{\"u}m and Juliana Hildebrandt and Dirk Habich and Wolfgang Lehner", title = "From a Comprehensive Experimental Survey to a Cost-based Selection Strategy for Lightweight Integer Compression Algorithms", journal = j-TODS, volume = "44", number = "3", pages = "9:1--9:??", month = jun, year = "2019", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3323991", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 29 10:55:21 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3323991", abstract = "Lightweight integer compression algorithms are frequently applied in in-memory database systems to tackle the growing gap between processor speed and main memory bandwidth. In recent years, the vectorization of basic techniques such as delta coding and null suppression has considerably enlarged the corpus of available algorithms. As a result, today there is a large number of algorithms to choose from, while different algorithms are tailored to different data characteristics. However, a comparative evaluation of these algorithms with different data and hardware characteristics has never been sufficiently conducted in the literature. To close this gap, we conducted an exhaustive experimental survey by evaluating several state-of-the-art lightweight integer compression algorithms as well as cascades of basic techniques. We systematically investigated the influence of data as well as hardware properties on the performance and the compression rates. The evaluated algorithms are based on publicly available implementations as well as our own vectorized reimplementations. We summarize our experimental findings leading to several new insights and to the conclusion that there is no single-best algorithm. Moreover, in this article, we also introduce and evaluate a novel cost model for the selection of a suitable lightweight integer compression algorithm for a given dataset.", acknowledgement = ack-nhfb, articleno = "9", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Bonifati:2019:IMS, author = "Angela Bonifati and Ugo Comignani and Emmanuel Coquery and Romuald Thion", title = "Interactive Mapping Specification with Exemplar Tuples", journal = j-TODS, volume = "44", number = "3", pages = "10:1--10:??", month = jun, year = "2019", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3321485", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 29 10:55:21 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3321485", abstract = "While schema mapping specification is a cumbersome task for data curation specialists, it becomes unfeasible for non-expert users, who are unacquainted with the semantics and languages of the involved transformations. In this article, we present an interactive framework for schema mapping specification suited for non-expert users. The underlying key intuition is to leverage a few exemplar tuples to infer the underlying mappings and iterate the inference process via simple user interactions under the form of Boolean queries on the validity of the initial exemplar tuples. The approaches available so far are mainly assuming pairs of complete universal data examples, which can be solely provided by data curation experts, or are limited to poorly expressive mappings. We present a quasi-lattice-based exploration of the space of all possible mappings that satisfy arbitrary user exemplar tuples. Along the exploration, we challenge the user to retain the mappings that fit the user's requirements at best and to dynamically prune the exploration space, thus reducing the number of user interactions. We prove that after the refinement process, the obtained mappings are correct and complete. We present an extensive experimental analysis devoted to measure the feasibility of our interactive mapping strategies and the inherent quality of the obtained mappings.", acknowledgement = ack-nhfb, articleno = "10", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Beedkar:2019:UFF, author = "Kaustubh Beedkar and Rainer Gemulla and Wim Martens", title = "A Unified Framework for Frequent Sequence Mining with Subsequence Constraints", journal = j-TODS, volume = "44", number = "3", pages = "11:1--11:??", month = jun, year = "2019", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3321486", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 29 10:55:21 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3321486", abstract = "Frequent sequence mining methods often make use of constraints to control which subsequences should be mined. A variety of such subsequence constraints has been studied in the literature, including length, gap, span, regular-expression, and hierarchy constraints. In this article, we show that many subsequence constraints-including and beyond those considered in the literature-can be unified in a single framework. A unified treatment allows researchers to study jointly many types of subsequence constraints (instead of each one individually) and helps to improve usability of pattern mining systems for practitioners. In more detail, we propose a set of simple and intuitive ``pattern expressions'' to describe subsequence constraints and explore algorithms for efficiently mining frequent subsequences under such general constraints. Our algorithms translate pattern expressions to succinct finite-state transducers, which we use as computational model, and simulate these transducers in a way suitable for frequent sequence mining. Our experimental study on real-world datasets indicates that our algorithms-although more general-are efficient and, when used for sequence mining with prior constraints studied in literature, competitive to (and in some cases superior to) state-of-the-art specialized methods.", acknowledgement = ack-nhfb, articleno = "11", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Deutsch:2019:VHA, author = "Alin Deutsch and Yuliang Li and Victor Vianu", title = "Verification of Hierarchical Artifact Systems", journal = j-TODS, volume = "44", number = "3", pages = "12:1--12:??", month = jun, year = "2019", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3321487", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 29 10:55:21 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3321487", abstract = "Data-driven workflows, of which IBM's Business Artifacts are a prime exponent, have been successfully deployed in practice, adopted in industrial standards, and have spawned a rich body of research in academia, focused primarily on static analysis. The present work represents a significant advance on the problem of artifact verification by considering a much richer and more realistic model than in previous work, incorporating core elements of IBM's successful Guard-Stage-Milestone model. In particular, the model features task hierarchy, concurrency, and richer artifact data. It also allows database key and foreign key dependencies, as well as arithmetic constraints. The results show decidability of verification and establish its complexity, making use of novel techniques including a hierarchy of Vector Addition Systems and a variant of quantifier elimination tailored to our context.", acknowledgement = ack-nhfb, articleno = "12", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Seidemann:2019:CHP, author = "Marc Seidemann and Nikolaus Glombiewski and Michael K{\"o}rber and Bernhard Seeger", title = "{ChronicleDB}: a High-Performance Event Store", journal = j-TODS, volume = "44", number = "4", pages = "13:1--13:??", month = oct, year = "2019", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3342357", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 29 10:55:21 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3342357", abstract = "Reactive security monitoring, self-driving cars, the Internet of Things (IoT), and many other novel applications require systems for both writing events arriving at very high and fluctuating rates to persistent storage as well as supporting analytical ad hoc queries. As standard database systems are not capable of delivering the required write performance, log-based systems, key-value stores, and other write-optimized data stores have emerged recently. However, the drawbacks of these systems are a fair query performance and the lack of suitable instant recovery mechanisms in case of system failures. In this article, we present ChronicleDB, a novel database system with a storage layout tailored for high write performance under fluctuating data rates and powerful indexing capabilities to support a variety of queries. In addition, ChronicleDB offers low-cost fault tolerance and instant recovery within milliseconds. Unlike previous work, ChronicleDB is designed either as a serverless library to be tightly integrated in an application or as a standalone database server. Our results of an experimental evaluation with real and synthetic data reveal that ChronicleDB clearly outperforms competing systems with respect to both write and query performance.", acknowledgement = ack-nhfb, articleno = "13", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Brijder:2019:EPQ, author = "Robert Brijder and Floris Geerts and Jan {Van Den Bussche} and Timmy Weerwag", title = "On the Expressive Power of Query Languages for Matrices", journal = j-TODS, volume = "44", number = "4", pages = "15:1--15:??", month = oct, year = "2019", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3331445", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 29 10:55:21 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3331445", abstract = "We investigate the expressive power of MATLANG, a formal language for matrix manipulation based on common matrix operations and linear algebra. The language can be extended with the operation inv for inverting a matrix. In MATLANG + inv, we can compute the transitive closure of directed graphs, whereas we show that this is not possible without inversion. Indeed, we show that the basic language can be simulated in the relational algebra with arithmetic operations, grouping, and summation. We also consider an operation eigen for diagonalizing a matrix. It is defined such that for each eigenvalue a set of mutually orthogonal eigenvectors is returned that span the eigenspace of that eigenvalue. We show that inv can be expressed in MATLANG + eigen. We put forward the open question whether there are Boolean queries about matrices, or generic queries about graphs, expressible in MATLANG + eigen but not in MATLANG + inv. Finally, the evaluation problem for MATLANG + eigen is shown to be complete for the complexity class $ \exists R $.", acknowledgement = ack-nhfb, articleno = "15", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Martens:2019:DES, author = "Wim Martens and Tina Trautner", title = "Dichotomies for Evaluating Simple Regular Path Queries", journal = j-TODS, volume = "44", number = "4", pages = "16:1--16:??", month = oct, year = "2019", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3331446", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 29 10:55:21 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3331446", abstract = "Regular path queries (RPQs) are a central component of graph databases. We investigate decision and enumeration problems concerning the evaluation of RPQs under several semantics that have recently been considered: arbitrary paths, shortest paths, paths without node repetitions (simple paths), and paths without edge repetitions (trails). Whereas arbitrary and shortest paths can be dealt with efficiently, simple paths and trails become computationally difficult already for very small RPQs. We study RPQ evaluation for simple paths and trails from a parameterized complexity perspective and define a class of simple transitive expressions that is prominent in practice and for which we can prove dichotomies for the evaluation problem. We observe that, even though simple path and trail semantics are intractable for RPQs in general, they are feasible for the vast majority of RPQs that are used in practice. At the heart of this study is a result of independent interest: the two disjoint paths problem in directed graphs is W[1]-hard if parameterized by the length of one of the two paths.", acknowledgement = ack-nhfb, articleno = "16", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Wang:2019:EAA, author = "Sibo Wang and Renchi Yang and Runhui Wang and Xiaokui Xiao and Zhewei Wei and Wenqing Lin and Yin Yang and Nan Tang", title = "Efficient Algorithms for Approximate Single-Source Personalized {PageRank} Queries", journal = j-TODS, volume = "44", number = "4", pages = "18:1--18:??", month = oct, year = "2019", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3360902", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Oct 29 10:55:21 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pagerank.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3360902", abstract = "Given a graph G, a source node s, and a target node t, the personalized PageRank ( PPR ) of t with respect to s is the probability that a random walk starting from s terminates at t. An important variant of the PPR query is single-source PPR ( SSPPR ), which enumerates all nodes in G and returns the top- k nodes with the highest PPR values with respect to a given source s. PPR in general and SSPPR in particular have important applications in web search and social networks, e.g., in Twitter's Who-To-Follow recommendation service. However, PPR computation is known to be expensive on large graphs and resistant to indexing. Consequently, previous solutions either use heuristics, which do not guarantee result quality, or rely on the strong computing power of modern data centers, which is costly. Motivated by this, we propose effective index-free and index-based algorithms for approximate PPR processing, with rigorous guarantees on result quality. We first present FORA, an approximate SSPPR solution that combines two existing methods-Forward Push (which is fast but does not guarantee quality) and Monte Carlo Random Walk (accurate but slow)-in a simple and yet non-trivial way, leading to both high accuracy and efficiency. Further, FORA includes a simple and effective indexing scheme, as well as a module for top- k selection with high pruning power. Extensive experiments demonstrate that the proposed solutions are orders of magnitude more efficient than their respective competitors. Notably, on a billion-edge Twitter dataset, FORA answers a top-500 approximate SSPPR query within 1s, using a single commodity server.", acknowledgement = ack-nhfb, articleno = "18", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J777", } @Article{Jensen:2020:EUE, author = "Christian S. Jensen", title = "Editorial: Updates to the {Editorial Board}", journal = j-TODS, volume = "45", number = "1", pages = "1e:1--1e:1", month = mar, year = "2020", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3381020", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Mar 4 07:13:27 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3381020", acknowledgement = ack-nhfb, articleno = "1e", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{McCamish:2020:GTA, author = "Ben McCamish and Vahid Ghadakchi and Arash Termehchy and Behrouz Touri and Eduardo Cotilla-Sanchez and Liang Huang and Soravit Changpinyo", title = "A Game-theoretic Approach to Data Interaction", journal = j-TODS, volume = "45", number = "1", pages = "1:1--1:44", month = mar, year = "2020", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3351450", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Mar 4 07:13:27 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3351450", abstract = "As most users do not precisely know the structure and/or the content of databases, their queries do not exactly reflect their information needs. The database management system (DBMS) may interact with users and use their feedback on the returned results \ldots{}", acknowledgement = ack-nhfb, articleno = "1", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Zhang:2020:FDD, author = "Dan Zhang and Ryan McKenna and Ios Kotsogiannis and George Bissias and Michael Hay and Ashwin Machanavajjhala and Gerome Miklau", title = "{$ \epsilon $KTELO}: a Framework for Defining Differentially Private Computations", journal = j-TODS, volume = "45", number = "1", pages = "2:1--2:44", month = mar, year = "2020", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3362032", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Mar 4 07:13:27 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3362032", abstract = "The adoption of differential privacy is growing, but the complexity of designing private, efficient, and accurate algorithms is still high. We propose a novel programming framework and system, $ \epsilon $KTELO for implementing both existing and new privacy \ldots{}", acknowledgement = ack-nhfb, articleno = "2", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Florenzano:2020:EEA, author = "Fernando Florenzano and Cristian Riveros and Mart{\'\i}n Ugarte and Stijn Vansummeren and Domagoj Vrgoc", title = "Efficient Enumeration Algorithms for Regular Document Spanners", journal = j-TODS, volume = "45", number = "1", pages = "3:1--3:42", month = mar, year = "2020", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3351451", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Mar 4 07:13:27 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3351451", abstract = "Regular expressions and automata models with capture variables are core tools in rule-based information extraction. These formalisms, also called regular document spanners, use regular languages to locate the data that a user wants to extract from a \ldots{}", acknowledgement = ack-nhfb, articleno = "3", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Livshits:2020:COR, author = "Ester Livshits and Benny Kimelfeld and Sudeepa Roy", title = "Computing Optimal Repairs for Functional Dependencies", journal = j-TODS, volume = "45", number = "1", pages = "4:1--4:46", month = mar, year = "2020", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3360904", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Mar 4 07:13:27 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3360904", abstract = "We investigate the complexity of computing an optimal repair of an inconsistent database, in the case where integrity constraints are Functional Dependencies (FDs). We focus on two types of repairs: an optimal subset repair (optimal S-repair), which is \ldots{}", acknowledgement = ack-nhfb, articleno = "4", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Zhang:2020:SRF, author = "Huanchen Zhang and Hyeontaek Lim and Viktor Leis and David G. Andersen and Michael Kaminsky and Kimberly Keeton and Andrew Pavlo", title = "Succinct Range Filters", journal = j-TODS, volume = "45", number = "2", pages = "5:1--5:31", month = jul, year = "2020", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3375660", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sun Jul 19 08:40:15 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3375660", abstract = "We present the Succinct Range Filter (SuRF), a fast and compact data structure for approximate membership tests. Unlike traditional Bloom filters, SuRF supports both single-key lookups and common range queries: open-range queries, closed-range queries, \ldots{}", acknowledgement = ack-nhfb, articleno = "5", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Fan:2020:AAP, author = "Wenfei Fan and Ping Lu and Wenyuan Yu and Jingbo Xu and Qiang Yin and Xiaojian Luo and Jingren Zhou and Ruochun Jin", title = "Adaptive Asynchronous Parallelization of Graph Algorithms", journal = j-TODS, volume = "45", number = "2", pages = "6:1--6:45", month = jul, year = "2020", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3397491", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sun Jul 19 08:40:15 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3397491", abstract = "This article proposes an Adaptive Asynchronous Parallel (AAP) model for graph computations. As opposed to Bulk Synchronous Parallel (BSP) and Asynchronous Parallel (AP) models, AAP reduces both stragglers and stale computations by dynamically adjusting \ldots{}", acknowledgement = ack-nhfb, articleno = "6", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Khamis:2020:LMR, author = "Mahmoud Abo Khamis and Hung Q. Ngo and Xuanlong Nguyen and Dan Olteanu and Maximilian Schleich", title = "Learning Models over Relational Data Using Sparse Tensors and Functional Dependencies", journal = j-TODS, volume = "45", number = "2", pages = "7:1--7:66", month = jul, year = "2020", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3375661", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sun Jul 19 08:40:15 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3375661", abstract = "Integrated solutions for analytics over relational databases are of great practical importance as they avoid the costly repeated loop data scientists have to deal with on a daily basis: select features from data residing in relational databases using \ldots{}", acknowledgement = ack-nhfb, articleno = "7", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Kolaitis:2020:LNT, author = "Phokion G. Kolaitis and Reinhard Pichler and Emanuel Sallinger and Vadim Savenkov", title = "On the Language of Nested Tuple Generating Dependencies", journal = j-TODS, volume = "45", number = "2", pages = "8:1--8:59", month = jul, year = "2020", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3369554", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sun Jul 19 08:40:15 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3369554", abstract = "During the past 15 years, schema mappings have been extensively used in formalizing and studying such critical data interoperability tasks as data exchange and data integration. Much of the work has focused on GLAV mappings, i.e., schema mappings \ldots{}", acknowledgement = ack-nhfb, articleno = "8", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Fan:2020:CNI, author = "Wenfei Fan and Xueli Liu and Ping Lu and Chao Tian", title = "Catching Numeric Inconsistencies in Graphs", journal = j-TODS, volume = "45", number = "2", pages = "9:1--9:47", month = jul, year = "2020", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3385031", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sun Jul 19 08:40:15 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3385031", abstract = "Numeric inconsistencies are common in real-life knowledge bases and social networks. To catch such errors, we extend graph functional dependencies with linear arithmetic expressions and built-in comparison predicates, referred to as numeric graph \ldots{}", acknowledgement = ack-nhfb, articleno = "9", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Jermaine:2020:E, author = "Chris Jermaine", title = "Editorial", journal = j-TODS, volume = "45", number = "3", pages = "10:1--10:1", month = sep, year = "2020", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3417730", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Sep 26 07:22:54 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3417730", acknowledgement = ack-nhfb, articleno = "10", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Kara:2020:MTQ, author = "Ahmet Kara and Hung Q. Ngo and Milos Nikolic and Dan Olteanu and Haozhe Zhang", title = "Maintaining Triangle Queries under Updates", journal = j-TODS, volume = "45", number = "3", pages = "11:1--11:46", month = sep, year = "2020", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3396375", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Sep 26 07:22:54 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3396375", abstract = "We consider the problem of incrementally maintaining the triangle queries with arbitrary free variables under single-tuple updates to the input relations. We introduce an approach called IVM$\epsilon$ that exhibits a trade-off between the update time, the space, \ldots{}", acknowledgement = ack-nhfb, articleno = "11", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Shaikhha:2020:SIL, author = "Amir Shaikhha and Mohammed Elseidy and Stephan Mihaila and Daniel Espino and Christoph Koch", title = "Synthesis of Incremental Linear Algebra Programs", journal = j-TODS, volume = "45", number = "3", pages = "12:1--12:44", month = sep, year = "2020", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3385398", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Sep 26 07:22:54 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3385398", abstract = "This article targets the Incremental View Maintenance (IVM) of sophisticated analytics (such as statistical models, machine learning programs, and graph algorithms) expressed as linear algebra programs. We present LAGO, a unified framework for linear \ldots{}", acknowledgement = ack-nhfb, articleno = "12", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Schirmer:2020:EDM, author = "Philipp Schirmer and Thorsten Papenbrock and Ioannis Koumarelas and Felix Naumann", title = "Efficient Discovery of Matching Dependencies", journal = j-TODS, volume = "45", number = "3", pages = "13:1--13:33", month = sep, year = "2020", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3392778", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Sep 26 07:22:54 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3392778", abstract = "Matching dependencies (MDs) are data profiling results that are often used for data integration, data cleaning, and entity matching. They are a generalization of functional dependencies (FDs) matching similar rather than same elements. As their \ldots{}", acknowledgement = ack-nhfb, articleno = "13", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Qi:2020:PTS, author = "Jianzhong Qi and Yufei Tao and Yanchuan Chang and Rui Zhang", title = "Packing {$R$}-trees with Space-filling Curves: Theoretical Optimality, Empirical Efficiency, and Bulk-loading Parallelizability", journal = j-TODS, volume = "45", number = "3", pages = "14:1--14:47", month = sep, year = "2020", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3397506", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Sep 26 07:22:54 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3397506", abstract = "The massive amount of data and large variety of data distributions in the big data era call for access methods that are efficient in both query processing and index management, and over both practical and worst-case workloads. To address this need, we \ldots{}", acknowledgement = ack-nhfb, articleno = "14", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Fan:2020:DGF, author = "Wenfei Fan and Chunming Hu and Xueli Liu and Ping Lu", title = "Discovering Graph Functional Dependencies", journal = j-TODS, volume = "45", number = "3", pages = "15:1--15:42", month = sep, year = "2020", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3397198", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Sep 26 07:22:54 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3397198", abstract = "This article studies discovery of Graph Functional Dependencies (GFDs), a class of functional dependencies defined on graphs. We investigate the fixed-parameter tractability of three fundamental problems related to GFD discovery. We show that the \ldots{}", acknowledgement = ack-nhfb, articleno = "15", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Nakandala:2020:IAC, author = "Supun Nakandala and Kabir Nagrecha and Arun Kumar and Yannis Papakonstantinou", title = "Incremental and Approximate Computations for Accelerating Deep {CNN} Inference", journal = j-TODS, volume = "45", number = "4", pages = "16:1--16:42", month = dec, year = "2020", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3397461", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sun Mar 28 09:41:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3397461", abstract = "Deep learning now offers state-of-the-art accuracy for many prediction tasks. A form of deep learning called deep convolutional neural networks (CNNs) are especially popular on image, video, and time series data. Due to its high computational cost, CNN \ldots{}", acknowledgement = ack-nhfb, articleno = "16", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Khamis:2020:FAQ, author = "Mahmoud Abo Khamis and Ryan R. Curtin and Benjamin Moseley and Hung Q. Ngo and Xuanlong Nguyen and Dan Olteanu and Maximilian Schleich", title = "Functional Aggregate Queries with Additive Inequalities", journal = j-TODS, volume = "45", number = "4", pages = "17:1--17:41", month = dec, year = "2020", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3426865", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sun Mar 28 09:41:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3426865", abstract = "Motivated by fundamental applications in databases and relational machine learning, we formulate and study the problem of answering functional aggregate queries (FAQ) in which some of the input factors are defined by a collection of additive \ldots{}", acknowledgement = ack-nhfb, articleno = "17", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Ciaccia:2020:FSD, author = "Paolo Ciaccia and Davide Martinenghi", title = "Flexible Skylines: Dominance for Arbitrary Sets of Monotone Functions", journal = j-TODS, volume = "45", number = "4", pages = "18:1--18:45", month = dec, year = "2020", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3406113", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sun Mar 28 09:41:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3406113", abstract = "Skyline and ranking queries are two popular, alternative ways of discovering interesting data in large datasets. Skyline queries are simple to specify, as they just return the set of all non-dominated tuples, thereby providing an overall view of \ldots{}", acknowledgement = ack-nhfb, articleno = "18", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Zimanyi:2020:MMD, author = "Esteban Zim{\'a}nyi and Mahmoud Sakr and Arthur Lesuisse", title = "{MobilityDB}: a Mobility Database Based on {PostgreSQL} and {PostGIS}", journal = j-TODS, volume = "45", number = "4", pages = "19:1--19:42", month = dec, year = "2020", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3406534", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sun Mar 28 09:41:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3406534", abstract = "Despite two decades of research in moving object databases and a few research prototypes that have been proposed, there is not yet a mainstream system targeted for industrial use. In this article, we present MobilityDB, a moving object database that \ldots{}", acknowledgement = ack-nhfb, articleno = "19", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Traub:2021:SGE, author = "Jonas Traub and Philipp Marian Grulich and Alejandro Rodr{\'\i}guez Cu{\'e}llar and Sebastian Bre{\ss} and Asterios Katsifodimos and Tilmann Rabl and Volker Markl", title = "{Scotty}: General and Efficient Open-source Window Aggregation for Stream Processing Systems", journal = j-TODS, volume = "46", number = "1", pages = "1:1--1:46", month = apr, year = "2021", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3433675", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Apr 15 14:48:28 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3433675", abstract = "Window aggregation is a core operation in data stream processing. Existing aggregation techniques focus on reducing latency, eliminating redundant computations, or minimizing memory usage. However, each technique operates under different assumptions \ldots{}", acknowledgement = ack-nhfb, articleno = "1", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Amarilli:2021:CDE, author = "Antoine Amarilli and Pierre Bourhis and Stefan Mengel and Matthias Niewerth", title = "Constant-Delay Enumeration for Nondeterministic Document Spanners", journal = j-TODS, volume = "46", number = "1", pages = "2:1--2:30", month = apr, year = "2021", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3436487", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Apr 15 14:48:28 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3436487", abstract = "We consider the information extraction framework known as document spanners and study the problem of efficiently computing the results of the extraction from an input document, where the extraction task is described as a sequential variable-set \ldots{}", acknowledgement = ack-nhfb, articleno = "2", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Mitchell:2021:ESM, author = "Rory Mitchell and Eibe Frank and Geoffrey Holmes", title = "An Empirical Study of Moment Estimators for Quantile Approximation", journal = j-TODS, volume = "46", number = "1", pages = "3:1--3:21", month = apr, year = "2021", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3442337", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Apr 15 14:48:28 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3442337", abstract = "We empirically evaluate lightweight moment estimators for the single-pass quantile approximation problem, including maximum entropy methods and orthogonal series with Fourier, Cosine, Legendre, Chebyshev and Hermite basis functions. We show how to apply \ldots{}", acknowledgement = ack-nhfb, articleno = "3", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Meduri:2021:EML, author = "Venkata Vamsikrishna Meduri and Kanchan Chowdhury and Mohamed Sarwat", title = "Evaluation of Machine Learning Algorithms in Predicting the Next {SQL} Query from the Future", journal = j-TODS, volume = "46", number = "1", pages = "4:1--4:46", month = apr, year = "2021", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3442338", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Apr 15 14:48:28 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3442338", abstract = "Prediction of the next SQL query from the user, given her sequence of queries until the current timestep, during an ongoing interaction session of the user with the database, can help in speculative query processing and increased interactivity. While existing machine learning-(ML) based approaches use recommender systems to suggest relevant queries to a user, there has been no exhaustive study on applying temporal predictors to predict the next user issued query.\par In this work, we experimentally compare ML algorithms in predicting the immediate next future query in an interaction workload, given the current user query or the sequence of queries in a user session thus far. As a part of this, we propose the adaptation of two powerful temporal predictors: (a) Recurrent Neural Networks (RNNs) and (b) a Reinforcement Learning approach called Q-Learning that uses Markov Decision Processes. We represent each query as a comprehensive set of fragment embeddings that not only captures the SQL operators, attributes, and relations but also the arithmetic comparison operators and constants that occur in the query. Our experiments on two real-world datasets show the effectiveness of temporal predictors against the baseline recommender systems in predicting the structural fragments in a query w.r.t. both quality and time. Besides showing that RNNs can be used to synthesize novel queries, we find that exact Q-Learning outperforms RNNs despite predicting the next query entirely from the historical query logs.", acknowledgement = ack-nhfb, articleno = "4", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Carmeli:2021:ECU, author = "Nofar Carmeli and Markus Kr{\"o}ll", title = "On the Enumeration Complexity of Unions of Conjunctive Queries", journal = j-TODS, volume = "46", number = "2", pages = "5:1--5:41", month = jun, year = "2021", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3450263", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sun Jun 6 07:07:25 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3450263", abstract = "We study the enumeration complexity of Unions of Conjunctive Queries (UCQs). We aim to identify the UCQs that are tractable in the sense that the answer tuples can be enumerated with a linear preprocessing phase and a constant delay between every \ldots{}", acknowledgement = ack-nhfb, articleno = "5", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Mhedhbi:2021:OOT, author = "Amine Mhedhbi and Chathura Kankanamge and Semih Salihoglu", title = "Optimizing One-time and Continuous Subgraph Queries using Worst-case Optimal Joins", journal = j-TODS, volume = "46", number = "2", pages = "6:1--6:45", month = jun, year = "2021", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3446980", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sun Jun 6 07:07:25 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3446980", abstract = "We study the problem of optimizing one-time and continuous subgraph queries using the new worst-case optimal join plans. Worst-case optimal plans evaluate queries by matching one query vertex at a time using multiway intersections. The core problem in \ldots{}", acknowledgement = ack-nhfb, articleno = "6", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Wei:2021:EFD, author = "Ziheng Wei and Sebastian Link", title = "Embedded Functional Dependencies and Data-completeness Tailored Database Design", journal = j-TODS, volume = "46", number = "2", pages = "7:1--7:46", month = jun, year = "2021", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3450518", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sun Jun 6 07:07:25 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3450518", abstract = "We establish a principled schema design framework for data with missing values. The framework is based on the new notion of an embedded functional dependency, which is independent of the interpretation of missing values, able to express completeness and \ldots{}", acknowledgement = ack-nhfb, articleno = "7", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Chen:2021:GIE, author = "Yangjun Chen and Gagandeep Singh", title = "Graph Indexing for Efficient Evaluation of Label-constrained Reachability Queries", journal = j-TODS, volume = "46", number = "2", pages = "8:1--8:50", month = jun, year = "2021", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3451159", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sun Jun 6 07:07:25 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3451159", abstract = "Given a directed edge labeled graph G, to check whether vertex v is reachable from vertex u under a label set S is to know if there is a path from u to v whose edge labels across the path are a subset of S. Such a query is referred to as a label-. \ldots{}", acknowledgement = ack-nhfb, articleno = "8", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Trummer:2021:SRB, author = "Immanuel Trummer and Junxiong Wang and Ziyun Wei and Deepak Maram and Samuel Moseley and Saehan Jo and Joseph Antonakakis and Ankush Rayabhari", title = "{SkinnerDB}: Regret-bounded Query Evaluation via Reinforcement Learning", journal = j-TODS, volume = "46", number = "3", pages = "9:1--9:45", month = sep, year = "2021", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3464389", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Sep 29 06:47:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3464389", abstract = "SkinnerDB uses reinforcement learning for reliable join ordering, exploiting an adaptive processing engine with specialized join algorithms and data structures. It maintains no data statistics and uses no cost or cardinality models. Also, it uses no \ldots{}", acknowledgement = ack-nhfb, articleno = "9", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Song:2021:SDC, author = "Shaoxu Song and Fei Gao and Aoqian Zhang and Jianmin Wang and Philip S. Yu", title = "Stream Data Cleaning under Speed and Acceleration Constraints", journal = j-TODS, volume = "46", number = "3", pages = "10:1--10:44", month = sep, year = "2021", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3465740", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Sep 29 06:47:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3465740", abstract = "Stream data are often dirty, for example, owing to unreliable sensor reading or erroneous extraction of stock prices. Most stream data cleaning approaches employ a smoothing filter, which may seriously alter the data without preserving the original \ldots{}", acknowledgement = ack-nhfb, articleno = "10", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Lin:2021:EBL, author = "Xuelian Lin and Shuai Ma and Jiahao Jiang and Yanchen Hou and Tianyu Wo", title = "Error Bounded Line Simplification Algorithms for Trajectory Compression: an Experimental Evaluation", journal = j-TODS, volume = "46", number = "3", pages = "11:1--11:44", month = sep, year = "2021", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3474373", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Sep 29 06:47:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3474373", abstract = "Nowadays, various sensors are collecting, storing, and transmitting tremendous trajectory data, and it is well known that the storage, network bandwidth, and computing resources could be heavily wasted if raw trajectory data is directly adopted. Line \ldots{}", acknowledgement = ack-nhfb, articleno = "11", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Khamis:2021:BQC, author = "Mahmoud Abo Khamis and Phokion G. Kolaitis and Hung Q. Ngo and Dan Suciu", title = "Bag Query Containment and Information Theory", journal = j-TODS, volume = "46", number = "3", pages = "12:1--12:39", month = sep, year = "2021", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3472391", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Sep 29 06:47:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3472391", abstract = "The query containment problem is a fundamental algorithmic problem in data management. While this problem is well understood under set semantics, it is by far less understood under bag semantics. In particular, it is a long-standing open question whether \ldots{}", acknowledgement = ack-nhfb, articleno = "12", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Ma:2021:DDS, author = "Chenhao Ma and Yixiang Fang and Reynold Cheng and Laks V. S. Lakshmanan and Wenjie Zhang and Xuemin Lin", title = "On Directed Densest Subgraph Discovery", journal = j-TODS, volume = "46", number = "4", pages = "13:1--13:45", month = dec, year = "2021", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3483940", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Dec 10 10:59:16 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3483940", abstract = "Given a directed graph G, the directed densest subgraph (DDS) problem refers to the finding of a subgraph from G, whose density is the highest among all the subgraphs of G. The DDS problem is fundamental to a wide range of applications, such as fraud \ldots{}", acknowledgement = ack-nhfb, articleno = "13", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Singh:2021:TRH, author = "Shikha Singh and Prashant Pandey and Michael A. Bender and Jonathan W. Berry and Mart{\'\i}n Farach-Colton and Rob Johnson and Thomas M. Kroeger and Cynthia A. Phillips", title = "Timely Reporting of Heavy Hitters Using External Memory", journal = j-TODS, volume = "46", number = "4", pages = "14:1--14:35", month = dec, year = "2021", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3472392", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Dec 10 10:59:16 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3472392", abstract = "Given an input stream $S$ of size $N$, a $\phi$-heavy hitter is an item that occurs at least $\phi N$ times in $S$. The problem of finding heavy-hitters is extensively studied in the database literature. We study a real-time heavy-hitters variant in which an element must \ldots{}", acknowledgement = ack-nhfb, articleno = "14", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Benedikt:2021:BEI, author = "Michael Benedikt and Pierre Bourhis and Louis Jachiet and Efthymia Tsamoura", title = "Balancing Expressiveness and Inexpressiveness in View Design", journal = j-TODS, volume = "46", number = "4", pages = "15:1--15:40", month = dec, year = "2021", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3488370", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Dec 10 10:59:16 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3488370", abstract = "We study the design of data publishing mechanisms that allow a collection of autonomous distributed data sources to collaborate to support queries. A common mechanism for data publishing is via views: functions that expose derived data to users, usually \ldots{}", acknowledgement = ack-nhfb, articleno = "15", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Grez:2021:FFC, author = "Alejandro Grez and Cristian Riveros and Mart{\'\i}n Ugarte and Stijn Vansummeren", title = "A Formal Framework for Complex Event Recognition", journal = j-TODS, volume = "46", number = "4", pages = "16:1--16:49", month = dec, year = "2021", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3485463", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Dec 10 10:59:16 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3485463", abstract = "Complex event recognition (CER) has emerged as the unifying field for technologies that require processing and correlating distributed data sources in real time. CER finds applications in diverse domains, which has resulted in a large number of proposals \ldots{}", acknowledgement = ack-nhfb, articleno = "16", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Berger:2022:SEC, author = "Gerald Berger and Georg Gottlob and Andreas Pieris and Emanuel Sallinger", title = "The Space-Efficient Core of {Vadalog}", journal = j-TODS, volume = "47", number = "1", pages = "1:1--1:46", month = mar, year = "2022", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3488720", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue May 3 06:36:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3488720", abstract = "Vadalog is a system for performing complex reasoning tasks such as those required in advanced knowledge graphs. The logical core of the underlying Vadalog language is the warded fragment of tuple-generating dependencies (TGDs). This formalism ensures \ldots{}", acknowledgement = ack-nhfb, articleno = "1", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Tong:2022:URP, author = "Yongxin Tong and Yuxiang Zeng and Zimu Zhou and Lei Chen and Ke Xu", title = "Unified Route Planning for Shared Mobility: an Insertion-based Framework", journal = j-TODS, volume = "47", number = "1", pages = "2:1--2:48", month = mar, year = "2022", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3488723", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue May 3 06:36:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3488723", abstract = "There has been a dramatic growth of shared mobility applications such as ride-sharing, food delivery, and crowdsourced parcel delivery. Shared mobility refers to transportation services that are shared among users, where a central issue is route planning. \ldots{}", acknowledgement = ack-nhfb, articleno = "2", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Binna:2022:HOT, author = "Robert Binna and Eva Zangerle and Martin Pichl and G{\"u}nther Specht and Viktor Leis", title = "Height Optimized Tries", journal = j-TODS, volume = "47", number = "1", pages = "3:1--3:46", month = mar, year = "2022", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3506692", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue May 3 06:36:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3506692", abstract = "We present the Height Optimized Trie (HOT), a fast and space-efficient in-memory index structure. The core algorithmic idea of HOT is to dynamically vary the number of bits considered at each node, which enables a consistently high fanout and thereby good \ldots{}", acknowledgement = ack-nhfb, articleno = "3", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Aumuller:2022:SNN, author = "Martin Aum{\"u}ller and Sariel Har-Peled and Sepideh Mahabadi and Rasmus Pagh and Francesco Silvestri", title = "Sampling a Near Neighbor in High Dimensions --- Who is the Fairest of Them All?", journal = j-TODS, volume = "47", number = "1", pages = "4:1--4:40", month = mar, year = "2022", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3502867", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue May 3 06:36:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3502867", abstract = "Similarity search is a fundamental algorithmic primitive, widely used in many computer science disciplines. Given a set of points S and a radius parameter $ r > 0 $, the $r$-near neighbor ($r$-NN) problem asks for a data structure that, given any query point $q$, \ldots{}", acknowledgement = ack-nhfb, articleno = "4", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Schmid:2022:CRP, author = "Markus L. Schmid", title = "Conjunctive Regular Path Queries with Capture Groups", journal = j-TODS, volume = "47", number = "2", pages = "5:1--5:52", month = jun, year = "2022", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3514230", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Jun 6 06:55:46 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3514230", abstract = "In practice, regular expressions are usually extended by so-called capture groups or capture variables, which allow to capture a subexpression by a variable that can be referenced in the regular expression in order to describe repetitions of subwords. We \ldots{}", acknowledgement = ack-nhfb, articleno = "5", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Fan:2022:IGC, author = "Wenfei Fan and Chao Tian", title = "Incremental Graph Computations: Doable and Undoable", journal = j-TODS, volume = "47", number = "2", pages = "6:1--6:44", month = jun, year = "2022", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3500930", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Jun 6 06:55:46 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3500930", acknowledgement = ack-nhfb, articleno = "6", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Cheng:2022:MOP, author = "Ji Cheng and Da Yan and Wenwen Qu and Xiaotian Hao and Cheng Long and Wilfred Ng and Xiaoling Wang", title = "Mining Order-preserving Submatrices under Data Uncertainty: a Possible-world Approach and Efficient Approximation Methods", journal = j-TODS, volume = "47", number = "2", pages = "7:1--7:57", month = jun, year = "2022", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3524915", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Jun 6 06:55:46 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3524915", abstract = "Given a data matrix $D$, a submatrix $S$ of $D$ is an order-preserving submatrix (OPSM) if there is a permutation of the columns of $S$, ! under which the entry values of each row in $S$ are strictly increasing. OPSM mining is \ldots{}", acknowledgement = ack-nhfb, articleno = "7", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Arroyuelo:2022:OJU, author = "Diego Arroyuelo and Gonzalo Navarro and Juan L. Reutter and Javiel Rojas-Ledesma", title = "Optimal Joins Using Compressed Quadtrees", journal = j-TODS, volume = "47", number = "2", pages = "8:1--8:53", month = jun, year = "2022", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3514231", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Jun 6 06:55:46 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3514231", abstract = "Worst-case optimal join algorithms have gained a lot of attention in the database literature. We now count several algorithms that are optimal in the worst case, and many of them have been implemented and validated in practice. However, the implementation \ldots{}", acknowledgement = ack-nhfb, articleno = "8", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Carmeli:2022:AUC, author = "Nofar Carmeli and Shai Zeevi and Christoph Berkholz and Alessio Conte and Benny Kimelfeld and Nicole Schweikardt", title = "Answering (Unions of) Conjunctive Queries using Random Access and Random-Order Enumeration", journal = j-TODS, volume = "47", number = "3", pages = "9:1--9:49", month = sep, year = "2022", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3531055", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 19 08:39:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3531055", abstract = "As data analytics becomes more crucial to digital systems, so grows the importance of characterizing the database queries that admit a more efficient evaluation. We consider the tractability yardstick of answer enumeration with a polylogarithmic delay \ldots{}", acknowledgement = ack-nhfb, articleno = "9", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Asudeh:2022:FRR, author = "Abolfazl Asudeh and Gautam Das and H. V. Jagadish and Shangqi Lu and Azade Nazi and Yufei Tao and Nan Zhang and Jianwen Zhao", title = "On Finding Rank Regret Representatives", journal = j-TODS, volume = "47", number = "3", pages = "10:1--10:37", month = sep, year = "2022", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3531054", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 19 08:39:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3531054", abstract = "Selecting the best items in a dataset is a common task in data exploration. However, the concept of ``best'' lies in the eyes of the beholder: Different users may consider different attributes more important and, hence, arrive at different rankings. \ldots{}", acknowledgement = ack-nhfb, articleno = "10", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Zeng:2022:PS, author = "Tianjing Zeng and Zhewei Wei and Ge Luo and Ke Yi and Xiaoyong Du and Ji-Rong Wen", title = "Persistent Summaries", journal = j-TODS, volume = "47", number = "3", pages = "11:1--11:42", month = sep, year = "2022", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3531053", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 19 08:39:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3531053", abstract = "A persistent data structure, also known as a multiversion data structure in the database literature, is a data structure that preserves all its previous versions as it is updated over time. Every update (inserting, deleting, or changing a data record) to \ldots{}", acknowledgement = ack-nhfb, articleno = "11", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Guo:2022:IMR, author = "Qintian Guo and Sibo Wang and Zhewei Wei and Wenqing Lin and Jing Tang", title = "Influence Maximization Revisited: Efficient Sampling with Bound Tightened", journal = j-TODS, volume = "47", number = "3", pages = "12:1--12:45", month = sep, year = "2022", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3533817", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Aug 19 08:39:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3533817", abstract = "Given a social network G with n nodes and m edges, a positive integer k, and a cascade model C, the influence maximization (IM) problem asks for k nodes in G such that the expected number of nodes influenced by the k nodes under cascade model C is \ldots{}", acknowledgement = ack-nhfb, articleno = "12", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Ketsman:2022:DRL, author = "Bas Ketsman and Christoph Koch and Frank Neven and Brecht Vandevoort", title = "Deciding Robustness for Lower {SQL} Isolation Levels", journal = j-TODS, volume = "47", number = "4", pages = "13:1--13:??", month = dec, year = "2022", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3561049", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Mar 31 10:14:50 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3561049", abstract = "While serializability always guarantees application correctness, lower isolation levels can be chosen to improve transaction throughput at the risk of introducing certain anomalies. A set of transactions is robust against a given isolation level if every \ldots{}", acknowledgement = ack-nhfb, articleno = "13", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{TenCate:2022:CQU, author = "Balder {Ten Cate} and Victor Dalmau", title = "Conjunctive Queries: Unique Characterizations and Exact Learnability", journal = j-TODS, volume = "47", number = "4", pages = "14:1--14:??", month = dec, year = "2022", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3559756", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Mar 31 10:14:50 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3559756", abstract = "We answer the question of which conjunctive queries are uniquely characterized by polynomially many positive and negative examples and how to construct such examples efficiently. As a consequence, we obtain a new efficient exact learning algorithm for a \ldots{}", acknowledgement = ack-nhfb, articleno = "14", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Wei:2022:PQT, author = "Victor Junqiu Wei and Raymond Chi-Wing Wong and Cheng Long and David Mount and Hanan Samet", title = "Proximity Queries on Terrain Surface", journal = j-TODS, volume = "47", number = "4", pages = "15:1--15:??", month = dec, year = "2022", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3563773", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Mar 31 10:14:50 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3563773", abstract = "Due to the advance of the geo-spatial positioning and the computer graphics technology, digital terrain data has become increasingly popular nowadays. Query processing on terrain data has attracted considerable attention from both the academic and the \ldots{}", acknowledgement = ack-nhfb, articleno = "15", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Do:2022:ESD, author = "Thanh Do and Goetz Graefe and Jeffrey Naughton", title = "Efficient Sorting, Duplicate Removal, Grouping, and Aggregation", journal = j-TODS, volume = "47", number = "4", pages = "16:1--16:??", month = dec, year = "2022", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3568027", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Mar 31 10:14:50 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3568027", abstract = "Database query processing requires algorithms for duplicate removal, grouping, and aggregation. Three algorithms exist: in-stream aggregation is most efficient by far but requires sorted input; sort-based aggregation relies on external merge sort; and hash aggregation relies on an in-memory hash table plus hash partitioning to temporary storage. Cost-based query optimization chooses which algorithm to use based on several factors, including the sort order of the input, input and output sizes, and the need for sorted output. For example, hash-based aggregation is ideal for output smaller than the available memory (e.g., Query 1 of TPC-H), whereas sorting the entire input and aggregating after sorting are preferable when both aggregation input and output are large and the output needs to be sorted for a subsequent operation such as a merge join.\par Unfortunately, the size information required for a sound choice is often inaccurate or unavailable during query optimization, leading to sub-optimal algorithm choices. In response, this article introduces a new algorithm for sort-based duplicate removal, grouping, and aggregation. The new algorithm always performs at least as well as both traditional hash-based and traditional sort-based algorithms. It can serve as a system's only aggregation algorithm for unsorted inputs, thus preventing erroneous algorithm choices. Furthermore, the new algorithm produces sorted output that can speed up subsequent operations. Google's F1 Query uses the new algorithm in production workloads that aggregate petabytes of data every day.", acknowledgement = ack-nhfb, articleno = "16", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Carmeli:2023:TOD, author = "Nofar Carmeli and Nikolaos Tziavelis and Wolfgang Gatterbauer and Benny Kimelfeld and Mirek Riedewald", title = "Tractable Orders for Direct Access to Ranked Answers of Conjunctive Queries", journal = j-TODS, volume = "48", number = "1", pages = "1:1--1:??", month = mar, year = "2023", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3578517", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Mar 31 10:14:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3578517", abstract = "We study the question of when we can provide direct access to the k-th answer to a Conjunctive Query (CQ) according to a specified order over the answers in time logarithmic in the size of the database, following a preprocessing step that constructs a \ldots{}", acknowledgement = ack-nhfb, articleno = "1", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Do:2023:RES, author = "Thanh Do and Goetz Graefe", title = "Robust and Efficient Sorting with Offset-value Coding", journal = j-TODS, volume = "48", number = "1", pages = "2:1--2:??", month = mar, year = "2023", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3570956", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Mar 31 10:14:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3570956", abstract = "Sorting and searching are large parts of database query processing, e.g., in the forms of index creation, index maintenance, and index lookup, and comparing pairs of keys is a substantial part of the effort in sorting and searching. We have worked on \ldots{}", acknowledgement = ack-nhfb, articleno = "2", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Huang:2023:ECS, author = "Ruihong Huang and Jianmin Wang and Shaoxu Song and Xuemin Lin and Xiaochen Zhu and Jian Pei", title = "Efficiently Cleaning Structured Event Logs: a Graph Repair Approach", journal = j-TODS, volume = "48", number = "1", pages = "3:1--3:??", month = mar, year = "2023", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3571281", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Fri Mar 31 10:14:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3571281", abstract = "Event data are often dirty owing to various recording conventions or simply system errors. These errors may cause serious damage to real applications, such as inaccurate provenance answers, poor profiling results, or concealing interesting patterns from \ldots{}", acknowledgement = ack-nhfb, articleno = "3", ajournal = "ACM Trans. Database Syst.", ajournal = "ACM Trans. Database Syst.", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Fakas:2023:PSD, author = "Georgios J. Fakas and Georgios Kalamatianos", title = "Proportionality on Spatial Data with Context", journal = j-TODS, volume = "48", number = "2", pages = "4:1--4:??", month = jun, year = "2023", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3588434", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Jul 3 07:30:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3588434", abstract = "More often than not, spatial objects are associated with some context, in the form of text, descriptive tags (e.g., points of interest, flickr photos), or linked entities in semantic graphs (e.g., Yago2, DBpedia). Hence, location-based retrieval should be \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "4", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Hu:2023:RDW, author = "Donghui Hu and Qing Wang and Song Yan and Xiaojun Liu and Meng Li and Shuli Zheng", title = "Reversible Database Watermarking Based on Order-preserving Encryption for Data Sharing", journal = j-TODS, volume = "48", number = "2", pages = "5:1--5:??", month = jun, year = "2023", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3589761", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Jul 3 07:30:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3589761", abstract = "In the era of big data, data sharing not only boosts the economy of the world but also brings about problems of privacy disclosure and copyright infringement. The collected data may contain users' sensitive information; thus, privacy protection should be \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "5", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Chen:2023:EBO, author = "Yaxing Chen and Qinghua Zheng and Zheng Yan", title = "Efficient Bi-objective {SQL} Optimization for Enclaved Cloud Databases with Differentially Private Padding", journal = j-TODS, volume = "48", number = "2", pages = "6:1--6:??", month = jun, year = "2023", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3597021", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Jul 3 07:30:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3597021", abstract = "Hardware-enabled enclaves have been applied to efficiently enforce data security and privacy protection in cloud database services. Such enclaved systems, however, are reported to suffer from I/O-size (also referred to as communication-volume)-based side-\ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "6", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Pavan:2023:MCM, author = "A. Pavan and N. V. Vinodchandran and Arnab Bhattacharyya and Kuldeep S. Meel", title = "Model Counting Meets {$ F_0 $} Estimation", journal = j-TODS, volume = "48", number = "3", pages = "7:1--7:??", month = sep, year = "2023", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3603496", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Sep 18 08:53:16 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3603496", abstract = "Constraint satisfaction problems (CSPs) and data stream models are two powerful abstractions to capture a wide variety of problems arising in different domains of computer science. Developments in the two communities have mostly occurred independently and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "7", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Sarkar:2023:ETP, author = "Subhadeep Sarkar and Tarikul Islam Papon and Dimitris Staratzis and Zichen Zhu and Manos Athanassoulis", title = "Enabling Timely and Persistent Deletion in {LSM}-Engines", journal = j-TODS, volume = "48", number = "3", pages = "8:1--8:??", month = sep, year = "2023", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3599724", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Sep 18 08:53:16 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3599724", abstract = "Data-intensive applications have fueled the evolution of log-structured merge (LSM) based key-value engines that employ the out-of-place paradigm to support high ingestion rates with low read/write interference. These benefits, however, come at the cost \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "8", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Leventidis:2023:DHD, author = "Aristotelis Leventidis and Laura {Di Rocco} and Wolfgang Gatterbauer and Ren{\'e}e J. Miller and Mirek Riedewald", title = "{DomainNet}: Homograph Detection and Understanding in Data Lake Disambiguation", journal = j-TODS, volume = "48", number = "3", pages = "9:1--9:??", month = sep, year = "2023", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3612919", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Sep 18 08:53:16 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3612919", abstract = "Modern data lakes are heterogeneous in the vocabulary that is used to describe data. We study a problem of disambiguation in data lakes: How can we determine if a data value occurring more than once in the lake has different meanings and is therefore a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "9", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Shangqi:2023:POM, author = "Lu Shangqi and Wim Martens and Matthias Niewerth and Yufei Tao", title = "Partial Order Multiway Search", journal = j-TODS, volume = "48", number = "4", pages = "10:1--10:??", month = dec, year = "2023", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3626956", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Jan 9 07:07:31 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3626956", abstract = "Partial order multiway search (POMS) is a fundamental problem that finds applications in crowdsourcing, distributed file systems, software testing, and more. This problem involves an interaction between an algorithm A and an oracle, conducted on a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "10", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Herodotou:2023:CBD, author = "Herodotos Herodotou and Elena Kakoulli", title = "Cost-based Data Prefetching and Scheduling in Big Data Platforms over Tiered Storage Systems", journal = j-TODS, volume = "48", number = "4", pages = "11:1--11:??", month = dec, year = "2023", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3625389", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Jan 9 07:07:31 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3625389", abstract = "The use of storage tiering is becoming popular in data-intensive compute clusters due to the recent advancements in storage technologies. The Hadoop Distributed File System, for example, now supports storing data in memory, SSDs, and HDDs, while OctopusFS \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "11", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Gottlob:2024:FPH, author = "Georg Gottlob and Matthias Lanzinger and Cem Okulmus and Reinhard Pichler", title = "Fast Parallel Hypertree Decompositions in Logarithmic Recursion Depth", journal = j-TODS, volume = "49", number = "1", pages = "1:1--1:??", month = mar, year = "2024", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3638758", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Mar 13 07:26:47 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3638758", abstract = "Various classic reasoning problems with natural hypergraph representations are known to be tractable if a hypertree decomposition (HD) of low width exists. The resulting algorithms are attractive for practical use in fields like databases and constraint \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "1", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Fan:2024:LEA, author = "Wenfei Fan and Ping Lu and Kehan Pang and Ruochun Jin and Wenyuan Yu", title = "Linking Entities across Relations and Graphs", journal = j-TODS, volume = "49", number = "1", pages = "2:1--2:??", month = mar, year = "2024", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3639363", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Mar 13 07:26:47 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3639363", abstract = "This article proposes a notion of parametric simulation to link entities across a relational database D and a graph G. Taking functions and thresholds for measuring vertex closeness, path associations, and important properties as parameters, parametric \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "2", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Wang:2024:AHT, author = "Zhaoguo Wang and Chuzhe Tang and Xiaodong Zhang and Qianmian Yu and Binyu Zang and Haibing Guan and Haibo Chen", title = "Ad Hoc Transactions through the Looking Glass: an Empirical Study of Application-Level Transactions in {Web} Applications", journal = j-TODS, volume = "49", number = "1", pages = "3:1--3:??", month = mar, year = "2024", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3638553", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Mar 13 07:26:47 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3638553", abstract = "Many transactions in web applications are constructed ad hoc in the application code. For example, developers might explicitly use locking primitives or validation procedures to coordinate critical code fragments. We refer to database operations \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "3", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Currim:2024:IRC, author = "Sabah Currim and Richard T. Snodgrass and Young-Kyoon Suh", title = "Identifying the Root Causes of {DBMS} Suboptimality", journal = j-TODS, volume = "49", number = "1", pages = "4:1--4:??", month = mar, year = "2024", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3636425", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Wed Mar 13 07:26:47 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3636425", abstract = "The query optimization phase within a database management system (DBMS) ostensibly finds the fastest query execution plan from a potentially large set of enumerated plans, all of which correctly compute the same result of the specified query. Sometimes \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "4", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Arroyuelo:2024:RWC, author = "Diego Arroyuelo and Adri{\'a}n G{\'o}mez-Brand{\'o}n and Aidan Hogan and Gonzalo Navarro and Juan Reutter and Javiel Rojas-Ledesma and Adri{\'a}n Soto", title = "The Ring: Worst-case Optimal Joins in Graph Databases using (Almost) No Extra Space", journal = j-TODS, volume = "49", number = "2", pages = "5:1--5:??", month = jun, year = "2024", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3644824", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu May 16 10:54:06 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3644824", abstract = "We present an indexing scheme for triple-based graphs that supports join queries in worst-case optimal (wco) time within compact space. This scheme, called a ring, regards each triple as a cyclic string of length 3. Each rotation of the triples is \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "5", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Chapman:2024:SBI, author = "Adriane Chapman and Luca Lauro and Paolo Missier and Riccardo Torlone", title = "Supporting Better Insights of Data Science Pipelines with Fine-grained Provenance", journal = j-TODS, volume = "49", number = "2", pages = "6:1--6:??", month = jun, year = "2024", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3644385", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu May 16 10:54:06 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3644385", abstract = "Successful data-driven science requires complex data engineering pipelines to clean, transform, and alter data in preparation for machine learning, and robust results can only be achieved when each step in the pipeline can be justified, and its effect on \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "6", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Zhang:2024:SQN, author = "Chao Zhang and Toumani Farouk", title = "Sharing Queries with Nonequivalent User-defined Aggregate Functions", journal = j-TODS, volume = "49", number = "2", pages = "7:1--7:??", month = jun, year = "2024", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3649133", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu May 16 10:54:06 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3649133", abstract = "This article presents Sharing User-Defined Aggregate Function (SUDAF), a declarative framework that allows users to write User-defined Aggregate Functions (UDAFs) as mathematical expressions and use them in Structured Query Language statements. SUDAF \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "7", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Carmeli:2024:DRS, author = "Nofar Carmeli and Martin Grohe and Benny Kimelfeld and Ester Livshits and Muhammad Tibi", title = "Database Repairing with Soft Functional Dependencies", journal = j-TODS, volume = "49", number = "2", pages = "8:1--8:??", month = jun, year = "2024", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3651156", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu May 16 10:54:06 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3651156", abstract = "A common interpretation of soft constraints penalizes the database for every violation of every constraint, where the penalty is the cost (weight) of the constraint. A computational challenge is that of finding an optimal subset: a collection of database \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "8", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Tench:2024:GHF, author = "David Tench and Evan West and Victor Zhang and Michael A. Bender and Abiyaz Chowdhury and Daniel Delayo and J. Ahmed Dellas and Mart{\'\i}n Farach-Colton and Tyler Seip and Kenny Zhang", title = "{GraphZeppelin}: How to Find Connected Components (Even When Graphs Are Dense, Dynamic, and Massive)", journal = j-TODS, volume = "49", number = "3", pages = "9:1--9:??", month = sep, year = "2024", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3643846", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Sep 24 06:31:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3643846", abstract = "Finding the connected components of a graph is a fundamental problem with uses throughout computer science and engineering. The task of computing connected components becomes more difficult when graphs are very large, or when they are dynamic, meaning the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "9", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Salas:2024:FCM, author = "Jorge Salas and Cristian Riveros and Sebasti{\'a}n Bugedo", title = "A Family of Centrality Measures for Graph Data Based on Subgraphs", journal = j-TODS, volume = "49", number = "3", pages = "10:1--10:??", month = sep, year = "2024", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3649134", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Sep 24 06:31:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3649134", abstract = "We present the theoretical foundations and first experimental study of a new approach in centrality measures for graph data. The main principle is straightforward: the more relevant subgraphs around a vertex, the more central it is in the network. We \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "10", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Gershtein:2024:ACT, author = "Shay Gershtein and Uri Avron and Ido Guy and Tova Milo and Slava Novgorodov", title = "Automated Category Tree Construction: Hardness Bounds and Algorithms", journal = j-TODS, volume = "49", number = "3", pages = "11:1--11:??", month = sep, year = "2024", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3664283", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Sep 24 06:31:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3664283", abstract = "Category trees, or taxonomies, are rooted trees where each node, called a category, corresponds to a set of related items. The construction of taxonomies has been studied in various domains, including e-commerce, document management, and question \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "11", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Hu:2024:ASB, author = "Pan Hu and Boris Motik", title = "Accurate Sampling-Based Cardinality Estimation for Complex Graph Queries", journal = j-TODS, volume = "49", number = "3", pages = "12:1--12:??", month = sep, year = "2024", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3689209", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Sep 24 06:31:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3689209", abstract = "Accurately estimating the cardinality (i.e., the number of answers) of complex queries plays a central role in database systems. This problem is particularly difficult in graph databases, where queries often involve a large number of joins and self-joins. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "12", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Dong:2024:IOT, author = "Wei Dong and Juanru Fang and Ke Yi and Yuchao Tao and Ashwin Machanavajjhala", title = "Instance-optimal Truncation for Differentially Private Query Evaluation with Foreign Keys", journal = j-TODS, volume = "49", number = "4", pages = "13:1--13:??", month = dec, year = "2024", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3697831", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 17 10:47:49 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3697831", abstract = "Answering SPJA queries under differential privacy (DP), including graph pattern counting under node-DP as an important special case, has received considerable attention in recent years. The dual challenge of foreign-key constraints combined with \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "13", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Benedikt:2024:RIC, author = "Michael Benedikt and Maxime Buron and Stefano Germano and Kevin Kappelmann and Boris Motik", title = "Rewriting the Infinite Chase for Guarded {TGDs}", journal = j-TODS, volume = "49", number = "4", pages = "14:1--14:??", month = dec, year = "2024", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3696416", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 17 10:47:49 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3696416", abstract = "Guarded tuple-generating dependencies (GTGDs) are a natural extension of description logics and referential constraints. It has long been known that queries over GTGDs can be answered by a variant of the chase -a quintessential technique for reasoning with \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "14", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Munoz:2024:SEN, author = "Mart{\'\i}n Mu{\~n}oz and Cristian Riveros", title = "Streaming Enumeration on Nested Documents", journal = j-TODS, volume = "49", number = "4", pages = "15:1--15:??", month = dec, year = "2024", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3701557", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 17 10:47:49 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3701557", abstract = "Some of the most relevant document schemas used online, such as XML and JSON, have a nested format. In the past decade, the task of extracting data from nested documents over streams has become especially relevant. We focus on the streaming evaluation of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "15", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Fan:2024:MIT, author = "Wenfei Fan and Kehan Pang and Ping Lu and Chao Tian", title = "Making It Tractable to Detect and Correct Errors in Graphs", journal = j-TODS, volume = "49", number = "4", pages = "16:1--16:??", month = dec, year = "2024", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3702315", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Dec 17 10:47:49 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3702315", abstract = "This article develops Hercules, a system for entity resolution (ER), conflict resolution (CR), timeliness deduction (TD), and missing value/link imputation (MI) in graphs. It proposes GCR$^+$ s, a class of graph cleaning rules (GCR) that support not only \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "16", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Bringmann:2025:TFG, author = "Karl Bringmann and Nofar Carmeli and Stefan Mengel", title = "Tight Fine-Grained Bounds for Direct Access on Join Queries", journal = j-TODS, volume = "50", number = "1", pages = "1:1--1:??", month = mar, year = "2025", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3707448", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Mar 10 14:52:49 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3707448", abstract = "We consider the task of lexicographic direct access to query answers. That is, we want to simulate an array containing the answers of a join query sorted in a lexicographic order chosen by the user. A recent dichotomy showed for which queries and orders \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "1", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Mouratidis:2025:MTS, author = "Kyriakos Mouratidis and Keming Li and Bo Tang", title = "Marrying Top-$k$ with Skyline Queries: Operators with Relaxed Preference Input and Controllable Output Size", journal = j-TODS, volume = "50", number = "1", pages = "2:1--2:??", month = mar, year = "2025", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3705726", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Mar 10 14:52:49 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3705726", abstract = "The two paradigms to identify records of preference in a multi-objective setting rely either on dominance (e.g., the skyline operator) or on a utility function defined over the records' attributes (typically using a top- k query). Despite their \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "2", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Li:2025:CBF, author = "Yiming Li and Yanyan Shen and Lei Chen and Mingxuan Yuan", title = "A Caching-based Framework for Scalable Temporal Graph Neural Network Training", journal = j-TODS, volume = "50", number = "1", pages = "3:1--3:??", month = mar, year = "2025", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3705894", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Mar 10 14:52:49 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3705894", abstract = "Representation learning over dynamic graphs is critical for many real-world applications such as social network services and recommender systems. Temporal graph neural networks (T-GNNs) are powerful representation learning methods and have demonstrated \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "3", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Jasny:2025:SDD, author = "Matthias Jasny and Tobias Ziegler and Jacob Nelson-Slivon and Viktor Leis and Carsten Binnig", title = "Synchronizing Disaggregated Data Structures with One-Sided {RDMA}: Pitfalls, Experiments and Design Guidelines", journal = j-TODS, volume = "50", number = "1", pages = "4:1--4:??", month = mar, year = "2025", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3716377", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Mon Mar 10 14:52:49 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "https://dl.acm.org/doi/10.1145/3716377", abstract = "Remote data structures built with one-sided Remote Direct Memory Access (RDMA) are at the heart of many disaggregated database management systems today. Concurrent access to these data structures by thousands of remote workers necessitates a highly \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "4", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Metwally:2025:SLB, author = "Ahmed Metwally", title = "Scaling and Load-Balancing Equi-Joins", journal = j-TODS, volume = "50", number = "2", pages = "5:1--5:??", month = jun, year = "2025", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3722102", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat May 24 12:14:19 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The task of joining two tables is fundamental for querying databases. In this article, we focus on the equi-join problem, where a pair of records from the two joined tables are part of the join results if equality holds between their values in the join \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "5", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Sun:2025:PTD, author = "Zitan Sun and Xin Huang and Jianliang Xu and Francesco Bonchi and Lijun Chang", title = "Probabilistic Truss Decomposition on Uncertain Graphs: Indexing and Dynamic Maintenance", journal = j-TODS, volume = "50", number = "2", pages = "6:1--6:??", month = jun, year = "2025", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3721428", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat May 24 12:14:19 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Networks in many real-world applications come with an inherent uncertainty in their structure, due to, for example, noisy measurements, inference and prediction models, or for privacy purposes. Modeling and analyzing uncertain graphs have attracted a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "6", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Wang:2025:AIT, author = "Chen Wang and Jialin Qiao and Xiangdong Huang and Shaoxu Song and Haonan Hou and Tian Jiang and Lei Rui and Jianmin Wang and Jiaguang Sun", title = "{Apache IoTDB}: a Time Series Database for Large Scale {IoT} Applications", journal = j-TODS, volume = "50", number = "2", pages = "7:1--7:??", month = jun, year = "2025", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3726523", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat May 24 12:14:19 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "A typical industrial scenario encounters thousands of devices with millions of sensors, consistently generating billions of data points. It poses new requirements of time series data management, not well addressed in existing solutions, including (1) \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "7", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Vandevoort:2025:AIL, author = "Brecht Vandevoort and Bas Ketsman and Frank Neven", title = "Allocating Isolation Levels to Transactions in a Multiversion Setting", journal = j-TODS, volume = "50", number = "2", pages = "8:1--8:??", month = jun, year = "2025", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3716374", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat May 24 12:14:19 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "A serializable concurrency control mechanism ensures consistency for OLTP systems at the expense of a reduced transaction throughput. A DBMS, therefore usually offers the possibility to allocate lower isolation levels for some transactions when it is safe \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "8", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Lebeda:2025:BDP, author = "Christian Janos Lebeda and Jakub Tetek", title = "Better Differentially Private Approximate Histograms and Heavy Hitters using the {Misra--Gries} Sketch", journal = j-TODS, volume = "50", number = "3", pages = "9:1--9:??", month = sep, year = "2025", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3716375", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 3 07:01:03 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We consider the problem of computing differentially private approximate histograms and heavy hitters in a stream of elements. In the non-private setting, this is often done using the sketch of Misra and Gries [Science of Computer Programming, 1982]. Chan, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "9", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Chai:2025:CEM, author = "Chengliang Chai and Kaisen Jin and Nan Tang and Ju Fan and Dongjing Miao and Jiayi Wang and Yuyu Luo and Guoliang Li and Ye Yuan and Guoren Wang", title = "Cost-effective Missing Value Imputation for Data-effective Machine Learning", journal = j-TODS, volume = "50", number = "3", pages = "10:1--10:??", month = sep, year = "2025", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3716376", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 3 07:01:03 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Given a dataset with incomplete data (e.g., missing values), training a machine learning model over the incomplete data requires two steps. First, it requires a data-effective step that cleans the data in order to improve the data quality (and the model \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "10", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{John:2025:HDD, author = "Sachin Basil John and Christoph Koch and Peter Lindner", title = "High-dimensional Data Cubes", journal = j-TODS, volume = "50", number = "3", pages = "11:1--11:??", month = sep, year = "2025", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3716373", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 3 07:01:03 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We introduce an approach to supporting high-dimensional data cubes at interactive query speeds and moderate storage cost. Our approach is based on binary(-domain) data cubes that are judiciously partially materialized; the missing information can be \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "11", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Aamer:2025:EWS, author = "Heba Aamer and Jan Hidders and Jan Paredaens and Jan {Van den Bussche}", title = "Expressiveness within Sequence {Datalog}", journal = j-TODS, volume = "50", number = "3", pages = "12:1--12:??", month = sep, year = "2025", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3732283", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Thu Jul 3 07:01:03 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Motivated by old and new applications, we investigate Datalog as a language for sequence databases. We reconsider classical features of Datalog programs, such as negation, recursion, intermediate predicates, and relations of higher arities. We also \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "12", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Tao:2025:EMN, author = "Yufei Tao", title = "Editorial: a Message from the New {Editor-in-Chief}", journal = j-TODS, volume = "50", number = "4", pages = "13:1--13:??", month = dec, year = "2025", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3736110", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Feb 7 07:06:43 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "13", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Xie:2025:GBK, author = "Jiadong Xie and Jeffrey Xu Yu and Yingfan Liu", title = "Graph Based {$K$}-Nearest Neighbor Search Revisited", journal = j-TODS, volume = "50", number = "4", pages = "14:1--14:30", month = dec, year = "2025", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3736716", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Feb 7 07:06:43 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The problem of k -nearest neighbor ( k -NN) search is a fundamental problem to find the exact k nearest neighbor points for a user-given query point q in a d -dimensional large dataset D with n points, and the approximate k -NN ( k -ANN) search problem is to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "14", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Wei:2025:BCN, author = "Ziheng Wei and Sebastian Link", title = "The Bounded Cardinality Normal Form for the Logical Design of Relational Database Schemata", journal = j-TODS, volume = "50", number = "4", pages = "15:1--15:45", month = dec, year = "2025", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3744897", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Feb 7 07:06:43 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The goal of classical normalization is to maintain data consistency under updates, with a minimum level of effort. Given functional dependencies (FDs) alone, this goal is only achievable in the special case an FD-preserving Boyce-Codd Normal Form (BCNF) \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "15", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Ji:2025:EPB, author = "Shuping Ji and Jianguo Yao and Wei Wang and Jun Wei and Hans-Arno Jacobsen", title = "Efficient Parallel {Boolean} Expression Matching", journal = j-TODS, volume = "50", number = "4", pages = "16:1--16:41", month = dec, year = "2025", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3736756", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Feb 7 07:06:43 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Boolean expression matching plays an important role in many applications. However, existing solutions still show efficiency and scalability limitations. For example, existing solutions often exhibit degraded performance when applied to high-dimensional \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "16", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Zhao:2025:STT, author = "Hangdong Zhao and Shaleen Deep and Paraschos Koutris", title = "Space-Time Tradeoffs for Conjunctive Queries with Access Patterns", journal = j-TODS, volume = "50", number = "4", pages = "17:1--17:45", month = dec, year = "2025", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3743130", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Feb 7 07:06:43 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In this article, we investigate space-time tradeoffs for answering conjunctive queries with access patterns (CQAPs). The goal is to create a space-efficient data structure in an initial preprocessing phase and use it for answering (multiple) queries in an \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "17", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Tziavelis:2026:KAE, author = "Nikolaos Tziavelis and Wolfgang Gatterbauer and Mirek Riedewald", title = "Any-$k$ Algorithms for Enumerating Ranked Answers to Conjunctive Queries", journal = j-TODS, volume = "51", number = "1", pages = "1:1--1:47", month = mar, year = "2026", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3734517", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Feb 7 07:06:44 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We study ranked enumeration for Conjunctive Queries (CQs) where the answers are ordered by a given ranking function (e.g., an ORDER BY clause in SQL). We develop `` any-k '' algorithms, which, without knowing the number k of desired answers, push down the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "1", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Tong:2026:BBA, author = "Yulai Tong and Fengrui Liu and Jinhui Xu and Hua Wang and Ke Zhou and JiaLe Miao and Cheng Wang and Rongfeng He", title = "{BISLearner}: Block-Aware Index Selection using Attention-Based Reinforcement Learning for Data Analytics", journal = j-TODS, volume = "51", number = "1", pages = "2:1--2:32", month = mar, year = "2026", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3760773", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Feb 7 07:06:44 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The development of data analytics services has fueled many optimizations in data scans, and indexes are one of the most important techniques to improve scan efficiency. Meanwhile, block-based data organization has become standard practice in these \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "2", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Tang:2026:ULB, author = "Xiu Tang and Shijie Yang and Sai Wu and Dongxiang Zhang and Wenchao Zhou and Feifei Li and Gang Chen", title = "Unveiling Logic Bugs in {SPJG} Query Optimizations within {DBMS}", journal = j-TODS, volume = "51", number = "1", pages = "3:1--3:35", month = mar, year = "2026", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3764583", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Feb 7 07:06:44 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Generation-based testing techniques have proven effective in detecting logic bugs in DBMS, often stemming from the improper implementation of query optimizers. However, existing generation-based debugging tools predominantly rely on random testing, which \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "3", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Deeds:2026:DSB, author = "Kyle Deeds and Dan Suciu and Magdalena Balazinska and Walter Cai", title = "Degree Sequence Bounds", journal = j-TODS, volume = "51", number = "1", pages = "4:1--4:27", month = mar, year = "2026", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3716378", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Feb 7 07:06:44 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Recent work has demonstrated the catastrophic effects of poor cardinality estimates on query processing time. In particular, underestimating query cardinality can result in overly optimistic query plans which take orders of magnitude longer to complete \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "4", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Yan:2026:EPO, author = "Yinzhao Yan and Raymond Chi-Wing Wong", title = "Efficient Path Oracles for Proximity Queries on Point Clouds", journal = j-TODS, volume = "51", number = "1", pages = "5:1--5:42", month = mar, year = "2026", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3770577", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Feb 7 07:06:44 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The prevalence of computer graphics technology boosts the development of point clouds, which offer advantages over Triangular Irregular Networks, i.e., TIN s, in proximity queries. All existing on-the-fly shortest path query algorithms and oracles on a TIN \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "5", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Zhu:2026:GQD, author = "Xuliang Zhu and Xin Huang and Kai Wang and Jianliang Xu and Xuemin Lin", title = "From Global to Query-Dependent: Summarization of Large Hierarchical {DAGs}", journal = j-TODS, volume = "51", number = "1", pages = "6:1--6:31", month = mar, year = "2026", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3769079", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Sat Feb 7 07:06:44 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Hierarchical directed acyclic graph (DAG) is an essential model for representing terminologies and their hierarchical relationships, such as Disease Ontology and ImageNet categories. Due to the vast number of terminologies and complex structures in a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "6", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Wang:2026:CQC, author = "Qichen Wang and Ke Yi", title = "Conjunctive Queries with Comparisons", journal = j-TODS, volume = "51", number = "2", pages = "7:1--7:37", month = jun, year = "2026", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3769424", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Mar 17 14:52:28 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Conjunctive queries with predicates in the form of comparisons that span multiple relations have regained interest recently, due to their relevance in OLAP queries, spatiotemporal databases, and machine learning over relational data. The standard \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "7", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Siddiqi:2026:SSF, author = "Shafaq Siddiqi and Arnab Phani and Roman Kern and Matthias Boehm", title = "{Saga++}: a Scalable Framework for Optimizing Data Cleaning Pipelines for Machine Learning Applications", journal = j-TODS, volume = "51", number = "2", pages = "8:1--8:33", month = jun, year = "2026", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3771766", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Mar 17 14:52:28 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "In the exploratory data science lifecycle, data scientists often spent the majority of their time finding, integrating, validating, and cleaning relevant datasets. Despite recent work on data validation, and numerous error detection and correction \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "8", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Dai:2026:RSJ, author = "Binyang Dai and Xiao Hu and Ke Yi", title = "Reservoir Sampling over Joins", journal = j-TODS, volume = "51", number = "2", pages = "9:1--9:35", month = jun, year = "2026", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3787855", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Mar 17 14:52:28 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Sampling over joins is a fundamental task in large-scale data analytics. Instead of computing the full join results, which could be massive, a uniform sample of the join results would suffice for many purposes, such as answering analytical queries or \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "9", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Yuan:2026:FSD, author = "Lyuheng Yuan and Da Yan and Dingwen Tao and Jiao Han and Saugat Adhikari and Cheng Long and Yang Zhou", title = "{T-FSM}: a Scalable Distributed Task-Based System for Frequent Subgraph Pattern Mining from a Big Graph", journal = j-TODS, volume = "51", number = "2", pages = "10:1--10:54", month = jun, year = "2026", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3771994", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Mar 17 14:52:28 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "Finding frequent subgraph patterns in a big graph is an important problem with many applications such as classifying chemical compounds and building indexes to speed up graph queries. Since this problem is NP-hard, some recent parallel and distributed \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "10", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Bernhardt:2026:UNO, author = "Arthur Bernhardt and Sajjad Tamimi and Florian Stock and Andreas Koch and Ilia Petrov", title = "Update {NDP}: On Offloading Modifications to Smart Storage with Transactional Guarantees in Near-Data Processing {DBMS}", journal = j-TODS, volume = "51", number = "2", pages = "11:1--11:45", month = jun, year = "2026", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3774753", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Mar 17 14:52:28 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "The performance and scalability of modern data-intensive systems processing large datasets are limited by unnecessary data movement. Even though near-data processing (NDP) can provably reduce data transfers and increase performance, at present, NDP is \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "11", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } @Article{Hu:2026:TJS, author = "Zeyuan Hu and Yisu Remy Wang and Daniel P. Miranker", title = "{TreeTracker} Join: Simple, Optimal, Fast", journal = j-TODS, volume = "51", number = "2", pages = "12:1--12:26", month = jun, year = "2026", CODEN = "ATDSD3", DOI = "https://doi.org/10.1145/3774325", ISSN = "0362-5915 (print), 1557-4644 (electronic)", ISSN-L = "0362-5915", bibdate = "Tue Mar 17 14:52:28 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "We present a novel linear-time acyclic join algorithm, TreeTracker Join (TTJ). The algorithm can be understood as the pipelined binary hash join with a simple twist: upon a hash lookup failure, TTJ resets execution to the binding of the tuple causing the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Database Syst.", articleno = "12", fjournal = "ACM Transactions on Database Systems", journal-URL = "https://dl.acm.org/loi/tods", } %%% ==================================================================== %%% Cross-referenced entries must come last: @InProceedings{Litwin:1980:LHN, author = "W. Litwin", title = "Linear Hashing: a New Tool for File and Table Addressing", crossref = "Lochovsky:1980:SIC", pages = "212--223", year = "1980", bibdate = "Tue Jul 19 00:55:06 1994", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Reprinted in \cite[p.~570--581]{Stonebraker:1988:RDS}.", annote = "On excess collisions create new bucket and modify hash. Leads to constant performance.", } @Article{Regnier:1985:AGF, author = "Mireille Regnier", title = "Analysis of Grid File Algorithms", journal = j-BIT, volume = "25", number = "2", pages = "335--357", month = jun, year = "1985", CODEN = "BITTEL, NBITAB", DOI = "https://doi.org/10.1007/BF01934379", ISSN = "0006-3835 (print), 1572-9125 (electronic)", ISSN-L = "0006-3835", MRclass = "68P10", MRnumber = "86m:68019", bibdate = "Wed Jan 4 18:52:19 MST 2006", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/bit.bib; https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/tods.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0006-3835&volume=25&issue=2&spage=335", abstract = "Grid File is a generic name for geometric algorithms used to map multiple-key indices onto primary files or databases. Two existing methods by other authors \cite{Fagin:1979:EHF,Larson:1978:DH} are generalized to the multidimensional environment. The presence of a hashing function is the primary difference between the two algorithms; it is recommended to provide uniformity when presented with biased key distributions.", acknowledgement = ack-nhfb, affiliationaddress = "Inst Natl de Recherche en Informatique et en Automatique, Chesnay, Fr", classification = "723", fjournal = "BIT (Nordisk tidskrift for informationsbehandling)", journal-URL = "http://link.springer.com/journal/10543", journalabr = "BIT (Copenhagen)", keywords = "computer programming --- Algorithms; data processing; database systems; dynamic data structures; File Organization; grid file algorithms; hashing; multi-key access", } @Proceedings{Kerr:1975:PIC, editor = "Douglas S. Kerr", booktitle = "{Proceedings of the International Conference on Very Large Data Bases, Framingham, MA, USA, September 22--24, 1975}", title = "{Proceedings of the International Conference on Very Large Data Bases, Framingham, MA, USA, September 22--24, 1975}", publisher = pub-ACM, address = pub-ACM:adr, pages = "viii + 592", year = "1975", ISSN = "0278-2596", LCCN = "QA76.9.D3 I55 1975", bibdate = "Fri Sep 16 12:12:29 1994", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "ACM SIGMOD v. 1, no. 1, September 1975.", price = "US\$15.00", acknowledgement = ack-nhfb, } @Proceedings{Lochovsky:1980:SIC, editor = "Frederick H. Lochovsky and ?. Taylor", key = "VLDB'80", booktitle = "{Sixth International Conference on Very Large Data Bases: reprinted from Very large data bases}", title = "{Sixth International Conference on Very Large Data Bases: reprinted from Very large data bases}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "ix + 435", year = "1980", bibdate = "Wed Jul 20 12:37:29 1994", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "IEEE Catalog Number 80CH1534-7C. ACM Order Number 471800. Long Beach Order Number 322.", acknowledgement = ack-nhfb, } @Proceedings{Schkolnick:1983:ICV, editor = "Mario Schkolnick and C. (Costantino) Thanos", booktitle = "{9th International Conference on Very Large Data Bases (Florence, Italy, October 31--November 2, 1983)}", title = "{9th International Conference on Very Large Data Bases (Florence, Italy, October 31--November 2, 1983)}", publisher = "VLDB Endowment", address = "P.O. Box 2245, Saratoga, CA, USA", pages = "xiii + 416", year = "1983", ISBN = "0-934613-15-X", ISBN-13 = "978-0-934613-15-6", LCCN = "QA 76.9 D3 I61 1983", bibdate = "Sat Dec 7 13:05:35 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "This conference is sponsored by VLDB Endowment and co-sponsored by IFIP et al.", acknowledgement = ack-nhfb, keywords = "database management -- congresses", } @Proceedings{ACM:1985:PFA, editor = "{ACM}", key = "ACM-PODS'85", booktitle = "{Proceedings of the Fourth ACM SIGACT-SIGMOD Symposium on Principles of Database Systems, March 25--27, 1985, Portland, Oregon}", title = "{Proceedings of the Fourth ACM SIGACT-SIGMOD Symposium on Principles of Database Systems, March 25--27, 1985, Portland, Oregon}", publisher = pub-ACM, address = pub-ACM:adr, pages = "275", year = "1985", ISBN = "0-89791-153-9", ISBN-13 = "978-0-89791-153-5", LCCN = "QA 76.9 D3 A296 1985", bibdate = "Sat Sep 17 10:24:09 1994", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", } @Book{Kambayashi:1986:TIC, editor = "Yahiko Kambayashi and Wesley Chu and Georges Gardarin and Setsuo Ohsuga", booktitle = "{Twelfth International Conference on Very Large Data Bases, Proceedings (VLDB '86)}", title = "{Twelfth International Conference on Very Large Data Bases, Proceedings (VLDB '86)}", publisher = pub-MORGAN-KAUFMANN, address = pub-MORGAN-KAUFMANN:adr, pages = "xiii + 512", year = "1986", ISBN = "0-934613-18-4", ISBN-13 = "978-0-934613-18-7", LCCN = "QA 76.9 D3 I61 1986", bibdate = "Tue Nov 10 07:59:52 1998", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/tods.bib", abstract = "This conference proceeding contains 54 papers, 3 of them are in abstract form only. The purpose of these papers is the promotion of an understanding of current research, and the exchange of ideas, experiences and future directions in database systems. The main subjects are office systems, database machines, computer aided design\slash computer aided manufacturing, query processing, relational operation optimization, extended data models, historical database systems, distributed database systems, knowledge-based systems and managing of unformatted data, such as text and graphical images.", acknowledgement = ack-nhfb, classification = "723; 903; 912", conference = "Twelfth International Conference on Very Large Data Bases, Proceedings (VLDB '86)", conferenceyear = "1986", keywords = "Artificial Intelligence --- Applications; Computer Metatheory --- Formal Logic; Data Processing, Business --- Data Description; Database Integrity; Database Systems; Information Retrieval Systems --- Evaluation; Knowledge Base Systems; Logic Programming; Management --- Information Systems; Query Processing; Research", meetingabr = "Twelfth Int Conf Very Large Data Bases Proc VLDB 86", meetingaddress = "Kyoto, Jpn", meetingdate = "Aug 25--28 1986", meetingdate2 = "08/25--28/86", sponsor = "VLDB Endowment, Jpn; IFIP; INRIA; Information Processing Soc of Japan; DARPA; et al", } @Book{Stonebraker:1988:RDS, editor = "Michael Stonebraker", booktitle = "Readings in Database Systems", title = "Readings in Database Systems", publisher = pub-MORGAN-KAUFMANN, address = pub-MORGAN-KAUFMANN:adr, pages = "xii + 644", year = "1988", ISBN = "0-934613-65-6", ISBN-13 = "978-0-934613-65-1", LCCN = "QA76.9.D3 R4 1988", bibdate = "Tue Jul 19 00:53:02 1994", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", note = "Contains reprint of \cite{Litwin:1980:LHN}.", price = "US\$29.95", abstract = "The purpose of this collection is to assemble significant research contributions so they are easily access-ible to anyone interested in database research. It is appropriate for use as an introduction for students or professionals from industry, and as a reference volume to anyone active in database systems \ldots. It is intended to serve as a core of material that any DBMS professional should be familiar with. Moreover, any industrial practitioner or graduate student who wishes to be current on the important research themes would be well advised to read these papers.", acknowledgement = ack-nhfb, bookpages = "xii + 644", } @Proceedings{Garcia-Molina:1990:PAS, editor = "H{\'e}ctor Garc{\'\i}a-Molina and H. V. Jagadish", booktitle = "{Proceedings of the 1990 ACM SIGMOD International Conference on Management of Data, May 23--25, 1990, Atlantic City, NJ}", title = "{Proceedings of the 1990 ACM SIGMOD International Conference on Management of Data, May 23--25, 1990, Atlantic City, NJ}", volume = "19(2)", publisher = pub-ACM, address = pub-ACM:adr, pages = "xii + 388", month = jun, year = "1990", ISBN = "0-89791-365-5", ISBN-13 = "978-0-89791-365-2", LCCN = "QA 76.9 D3 S53 v.19 no.2 1990", bibdate = "Mon Dec 09 07:53:58 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", series = "SIGMOD Record", acknowledgement = ack-nhfb, } @Book{Zdonik:1990:ROO, editor = "B. Zdonik and David Maier", booktitle = "Readings in Object-Oriented Database Systems", title = "Readings in Object-Oriented Database Systems", publisher = pub-MORGAN-KAUFMANN, address = pub-MORGAN-KAUFMANN:adr, pages = "x + 629", year = "1990", ISBN = "1-55860-000-0", ISBN-13 = "978-1-55860-000-3", LCCN = "QA76.9.D3 R42 1990", bibdate = "Mon Dec 09 07:31:31 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/tods.bib", price = "US\$29.95", series = "Morgan Kaufmann series in data management systems", acknowledgement = ack-nhfb, }