@comment{{This file has been generated by bib2bib 1.92}}

@comment{{Command line: C:\alldata\data\webbib\bib2bib-1.92.exe -oc books -ob books.bib -c '($type = "PHDTHESIS" | $type = "BOOK")' cormode.bib}}

@book{CormodeYi20, author = {Graham Cormode and Ke Yi}, title = {Small Summaries for Big Data}, year = {2020}, publisher = {CUP}, url = {http://cormode.org/ssbd}, abstract = { The volume of data generated in modern applications can be massive, overwhelming our abilities to conveniently transmit, store, and index. For many scenarios, it is desirable to instead build a compact summary of a dataset that is vastly smaller. In exchange for some approximation, we obtain flexible and efficient tools that can answer a range of different types of query over the data. This book provides a comprehensive introduction to the topic data summarization, showcasing the algorithms, their behavior, and the mathematical underpinnings of their operation. The coverage starts with simple sums and approximate counts, building to more advanced probabilistic structures such as the Bloom Filter, distinct value summaries, sketches, and quantile summaries. Summaries are described for specific types of data, such as geometric data, graphs, and vectors and matrices. Throughout, examples, pseudocode and applications are given to enhance understanding. } }

@book{CormodeGarofalakisHaasJermaine12, title = {Synopses for Massive Data: Samples, Histograms, Wavelets and Sketches}, author = {Graham Cormode and Minos Garofalakis and Peter Haas and Chris Jermaine}, year = 2012, publisher = {{\em now} publishers}, url = {http://www.softnet.tuc.gr/~minos/Papers/fntdb12.pdf}, abstract = {Methods for approximate query processing are essential for dealing with massive data. They are often the only means of providing interactive response times when exploring massive datasets, and are also needed to handle high speed data streams. These methods proceed by computing a lossy, compact synopsis of the data, and then executing the query of interest against the synopsis rather than the entire data set. We describe basic principles and recent developments in approximate query processing. We focus on four key synopses: random samples, histograms, wavelets, and sketches. We consider issues such as accuracy, space and time efficiency, optimality, practicality, range of applicability, error bounds on query answers, and incremental maintenance. We also discuss the trade-offs between the different synopsis types. } }

@book{CormodeThottan09, editor = {G. Cormode and M. Thottan}, att_authors = {gc2602}, att_private = {false}, title = {Algorithms for Next Generation Networks}, year = {2010}, publisher = {Springer}, link = {http://www.springer.com/computer/communications/book/978-1-84882-764-6}, abstract = { Since the early 1990s coupled with the widespread deployment of broadband to the home, we have seen remarkable progress in the ease of Internet accessibility to end users. Both commercial and private sectors rely heavily on the availability of the Internet to conduct normal day to day functions. Underpinning this exponential growth in popularity of the Internet are the advances made in the applications of basic algorithms to design and architect the Internet. The most obvious example of these algorithms is the use of search engines to collect and correlate vast amounts of information that is spread throughout the Internet. With the dawn of this new century, we are now on the verge of expanding the notion of what we mean to communicate. A new generation of netizens are poised to leverage the Internet for a myriad different applications that we have not envisioned thus far. This will require that the Internet be flexible and adapt to accommodate the requirements of next generation applications. To address this challenge, in the United States, the National Science Foundation has initiated a large research project GENI. The goal of GENI is to perform a clean-slate design for a new Internet. In particular, the aim of this project is to rethink the basic design assumptions on which the current Internet is built, with the possibility that to improve flexibility for new services we may arrive at a radically different Internet, beyond what one might imagine from evolving the current network. Given this context of internet research, the purpose of this book is to provide a comprehensive survey of present algorithms and methodologies used in the design and deployment of the Internet. We believe that a thorough understanding of algorithms used by the Internet today is critical to develop new algorithms that will form the basis of the future Internet. The book is divided into 3 parts dealing with the application of algorithms to different aspects of network design, operations and next generation applications. Part 1 provides an algorithmic basis for the design of networks both at the physical and the service layer. This part is extensive since it considers different physical layer network technologies. The second part of this book covers two important topics of network operations and management. As we know today, network providers have already completed field trials for the 100Gbps network. It should not be long before these capacities become common place on the Internet. The challenge of processing packets at such high speeds imposes a tremendous significance on efficient and fast packet processing algorithms. Part 3 of this book discusses algorithmic techniques that form the basis of emerging applications. In this book we have attempted to provide a flavor of how algorithms have formed the basis of the Internet as we know it today. It is our hope that this book will provide a useful overview of algorithms applied to communication networks, for any student who aspires to do research in network architecture as well the application of algorithms to communication networks. We believe that for a robust design of the future Internet, it is essential that the architecture be founded on the basis of sound algorithmic principles. } }

@book{AbelloCormode05, editor = {J. Abello and G. Cormode}, att_authors = {gc2602}, att_private = {false}, title = {Discrete Methods in Epidemiology}, year = {2006}, publisher = {AMS}, series = {DIMACS}, volume = 70, url = {../papers/discreteepidintro.pdf}, link = {http://www.ams.org/bookstore-getitem/item=dimacs-70}, abstract = { In general terms, epidemiology deals with populations rather than individuals. One of its goals is to study the frequency of occurrences of health related events. It has a major but not exclusive concern with causes and determinants of disease patterns in populations. The premise is that a systematic investigation of different populations can identify causal and preventive factors. Epidemiology is an observational rather than an experimental science. Sample questions take the form of: * Does population exposure to $x$ increase the risk of a disease $w$? * Are dietary supplements $\{x,y,z\}$ beneficial in lowering the risk of malady $w$? * Do behavioral interventions reduce risk behaviors? \end{itemize} We have observed that occurrence measures, causal inference and study designs play prominent roles in the daily endeavors of a typical epidemiologist. Descriptive and analytical epidemiology are two overlapping flavors of this discipline. Descriptive epidemiology attempts to describe patterns of disease according to spatial and temporal information about the members of a population. These patterns are described by tabulations or summaries of surveys and polls or by parametric or non-parametric population models. Models are in general global descriptions of the major part of a data set. Patterns on the other hand are local features of the data that can be described by association rules, mode or gaps in density functions, outliers, inflection points in regressions, symptom clusters, geographic hot spots, etc. Some epidemiologists appear more interested in local patterns rather than in global structure. This raises questions of how ``realistic'' certain patterns are. Analytical Epidemiology attempts to explain and predict the state of a population's health. A typical goal is to summarize the relationship between exposure and disease incidence by comparing two measures of disease frequency. These comparisons may be affected by chance, bias and by the presence or absence of an effect. This explains naturally why statistical methods play a major role in Epidemiology since bias is a central preoccupation of its practitioners. Bias means a systematic error that results in an incorrect or invalid estimate of the measure of association. This can create or mask associations. Selection and information bias are two of the main bias types. In particular, selection shall be independent of exposure if the purpose of the study is to explain the relationship between exposure and disease occurrence. In summary, one of the central themes in analytical epidemiology is to understand the roles of bias, chance and real effect in the understanding of populations health. To evaluate the role of chance, statistical hypothesis testing and estimation appear to be the tools of choice. On the other hand, generative models offer a way to describe infectious disease dynamics. Since disease patterns are of primary interest, data mining algorithms and detection of rules for pattern formation have a lot to offer. Classification and taxonomies are useful tools to develop predictive models. In general we believe that some questions addressed by epidemiologists benefit from viewing them in a mathematical and algorithmic context. This volume is a first attempt to bridge the gap between the two communities. Its main emphasis is on discrete methods that have successfuly addressed some epidemiological question. We begin by providing introductory chapters, on some of the key methods from discrete data mining, by a selection of researchers in this area; and on descriptive epidemiology by Schneider. These collect, in a digested form, what we believe are among the most potentially useful concepts in data mining and epidemiology. Next there are two chapters reporting work in epidemiology that suggest a discrete, analytical approach: Shannnon on challenges in molecular data analysis, and Hirschman and Damianos on a system for monitoring news wires for indications of disease outbreaks. The remainder of the volume draws out further some of the key areas in the intersection between epidemiology and discrete methods. The technique of formal concept analysis, and the amazing depth of mathematical structure that arises from it is explored in chapters by Ozonoff, Pogel and Hannan, and Abello and Pogel. The dynamics of disease transmission can be modeled in a variety of ways, but often involves setting up systems of differential equations to model the ebb and flow of infection, as demonstrated by Desai, Boily, M\^{a}sse and Anderson, and V\'{a}zquez, in the context of quite different problems. Eubank, Kumar, Marathe, Srinivasan and Wang study massive interaction graphs and give results by a combination of combinatorial methods and simulation; Abello and Capalbo focus on properties of graphs generated by an appropriate random model; while Hartke takes a combinatorial model of disease spread on tree graphs. Finally, we see two applications of Support Vector Machines to epidemiological data sets, from Li, Muchnik and Schneider (using breast cancer data from the SEER database) and from Fradkin, Muchnik, Hermans and Morgan (using data on disease in chickens). Some other potential areas of interest that we have not touched in this collection relate to patient confidentiality, coding and cryptography and multiscale inference. } }

@phdthesis{Cormode03, author = {G. Cormode}, att_authors = {gc2602}, att_private = {false}, year = {2003}, title = {Sequence Distance Embeddings}, school = {University of Warwick}, url = {../papers/cormode-seqdistembed.pdf}, abstract = { Sequences represent a large class of fundamental objects in Computer Science - sets, strings, vectors and permutations are considered to be sequences. Distances between sequences measure their similarity, and computations based on distances are ubiquitous: either to compute the distance, or to use distance computation as part of a more complex problem. This thesis takes a very specific approach to solving questions of sequence distance: sequences are embedded into other distance measures, so that distance in the new space approximates the original distance. This allows the solution of a variety of problems including: \begin{itemize} \item Fast computation of short `sketches' in a variety of computing models, which allow sequences to be compared in constant time and space irrespective of the size of the original sequences. \item Approximate nearest neighbor and clustering problems, significantly faster than the na\"{i}ve exact solutions. \item Algorithms to find approximate occurrences of pattern sequences in long text sequences in near linear time. \item Efficient communication schemes to approximate the distance between, and exchange, sequences in close to the optimal amount of communication. \end{itemize} Solutions are given for these problems for a variety of distances, including fundamental distances on sets and vectors; distances inspired by biological problems for permutations; and certain text editing distances for strings. Many of these embeddings are computable in a streaming model where the data is too large to store in memory, and instead has to be processed as and when it arrives, piece by piece. The embeddings are also shown to be practical, with a series of large scale experiments which demonstrate that given only a small space, approximate solutions to several similarity and clustering problems can be found that are as good as or better than those found with prior methods.} }

*This file was generated by
bibtex2html 1.92.*