@techreport{ilprints421, number = {1999-65}, month = {November}, author = {Sergey Brin}, note = {Previous number = SIDL-WP-1999-0119}, type = {Technical Report}, title = {Extracting Patterns and Relations from the World Wide Web.}, publisher = {Stanford InfoLab}, year = {1999}, institution = {Stanford InfoLab}, journal = {WebDB Workshop at EDBT'98}, url = {http://ilpubs.stanford.edu:8090/421/}, abstract = {The World Wide Web is a vast resource for information. At the same time it is extremely distributed. A particular type of data such as restaurant lists may be scattered across thousands of independent information sources in many different formats. In this paper, we consider the problem of extracting a relation for such a data type from all of these sources automatically. We present a technique which exploits the duality between sets of patterns and relations to grow the target relation starting from a small sample. To test our technique we use it to extract a relation of (author, title) pairs from the World Wide Web.} }