@techreport{ilprints528, number = {2002-10}, month = {February}, author = {Dan Klein and Sepandar D. Kamvar and Christopher D. Manning}, title = {From Instance-level Constraints to Space-Level Constraints: Making the Most of Prior Knowledge in Data Clustering}, type = {Technical Report}, publisher = {Stanford}, institution = {Stanford InfoLab}, year = {2002}, keywords = {clustering, constrained clustering, prior knowledge}, url = {http://ilpubs.stanford.edu:8090/528/}, abstract = {We present an improved method for clustering in the presence of very limited supervisory information, given as pairwise instance constraints. By allowing instance-level constraints to have space-level inductive implications, we are able to successfully incorporate constraints for a wide range of data set types. Our method greatly improves on the previously studied constrained k-means algorithm, generally requiring less than half as many constraints to achieve a given accuracy on a range of real-world data, while also being more robust when over-constrained. We additionally discuss an active learning algorithm which increases the value of constraints even further.} }