@techreport{ilprints554, number = {2002-46}, month = {April}, author = {Joseph Smarr and Christopher D. Manning}, title = {Classifying Unknown Proper Noun Phrases Without Context}, type = {Technical Report}, publisher = {Stanford}, institution = {Stanford InfoLab}, year = {2002}, keywords = {named-entity classification, unknown words, probabilistic modeling, n-grams}, url = {http://ilpubs.stanford.edu:8090/554/}, abstract = {We present a probabilistic generative model used to classify unknown Proper Noun Phrases into semantic categories. The core of the classifier is an n-gram character model, which is enhanced with an n-gram word-length model and a common word model. While most work has depended largely on context or domain-specific rules for semantic disambiguation of unknown names, we demonstrate that there is surprisingly reliable statistical information available in the composition of the names themselves. Using the context-independent probabilities assigned by our domain independent classifier is sufficient to achieve greater than 90% classification accuracy on typical tasks.} }