@techreport{ilprints529, number = {2002-11}, month = {February}, author = {Sepandar D. Kamvar and Dan Klein and Christopher D. Manning}, title = {Interpreting and Extending Classical Agglomerative Clustering Algorithms using a Model-Based Approach}, type = {Technical Report}, publisher = {Stanford}, institution = {Stanford InfoLab}, year = {2002}, keywords = {clustering, probabilistic models, model-based clustering, hierarchical clustering}, url = {http://ilpubs.stanford.edu:8090/529/}, abstract = {We present two results which arise from a model-based approach to hierarchical agglomerative clustering. First, we show formally that the common heuristic agglomerative clustering algorithms -- single-link, complete-link, group-average, and Ward's method -- are each equivalent to a hierarchical model-based method. This interpretation gives a theoretical explanation of the empirical behavior of these algorithms, as well as a principled approach to resolving practical issues, such as number of clusters or the choice of method. Second, we show how a model-based approach can be used to extend these basic agglomerative algorithms. We introduce adjusted complete-link, Mahalanobis-link, and line-link as variants of the classical agglomerative methods, and demonstrate their utility.} }