@techreport{ilprints779, number = {2006-14}, month = {June}, author = {Omar Benjelloun and Hector Garcia-Molina and Hideki Kawai and Tait Eliott Larson and David Menestrina and Qi Su and Sutthipong Thavisomboon and Jennifer Widom}, title = {Generic Entity Resolution in the SERF Project}, type = {Technical Report}, publisher = {Stanford InfoLab}, year = {2006}, institution = {Stanford InfoLab}, journal = {IEEE Data Engineering Bulletin, June 2006 Issue}, keywords = {Data Cleaning, Generic Entity Resolution, Record Linkage, Deduplication}, url = {http://ilpubs.stanford.edu:8090/779/}, abstract = {The SERF project at Stanford deals with the Entity Resolution (ER) problem, in which records determined to represent the same real-life ``entities'' (such as people or products) are successively located and combined. The approach we pursue is ``generic'', in the sense that the specific functions used to match and merge records are viewed as black boxes, which permits efficient, expressive and extensible ER solutions. This paper motivates and introduces the principles of generic ER, and gives an overview of the research directions we have been exploring in the SERF project over the past two years.} }