@techreport{ilprints697, number = {2005-33}, month = {October}, author = {Zoltan Gyongyi and Pavel Berkhin and Hector Garcia-Molina and Jan Pedersen}, title = {Link Spam Detection Based on Mass Estimation}, type = {Technical Report}, publisher = {Stanford}, year = {2005}, institution = {Stanford InfoLab}, journal = {Technical Report}, keywords = {web search; link spam detection}, url = {http://ilpubs.stanford.edu:8090/697/}, abstract = {Link spamming intends to mislead search engines and trigger an artificially high link-based ranking of specific target web pages. This paper introduces the concept of spam mass, a measure of the impact of link spamming on a page's ranking. We discuss how to estimate spam mass and how the estimates can help identifying pages that benefit significantly from link spamming. In our experiments on the host-level Yahoo! web graph we use spam mass estimates to successfully identify tens of thousands of instances of heavy-weight link spamming.} }