edu.umass.cs.mallet.projects.seg_plus_coref.coreference
Class CitationNormalizer
java.lang.Object
edu.umass.cs.mallet.projects.seg_plus_coref.coreference.CitationNormalizer
- public class CitationNormalizer
- extends java.lang.Object
Created by IntelliJ IDEA.
User: michaelhay
Date: Feb 17, 2004
Time: 8:36:50 PM
To change this template use Options | File Templates.
Field Summary |
static java.lang.String[] |
STOP_WORDS
|
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
STOP_WORDS
public static final java.lang.String[] STOP_WORDS
CitationNormalizer
public CitationNormalizer()
getTokens
public java.util.List getTokens(java.lang.String s)
getTokensAsSet
public java.util.Set getTokensAsSet(java.lang.String s)
norm1
public java.lang.String norm1(java.lang.String s)
authorNorm
public java.lang.String authorNorm(java.lang.String a)
norm
public java.lang.String norm(java.lang.String s)
norm2
public java.lang.String norm2(java.lang.String s)
getFourDigitString
public java.lang.String getFourDigitString(java.lang.String s)
makeSubstitutions
public java.util.List makeSubstitutions(java.util.List tokens)
makeSubstitutions
public java.util.List makeSubstitutions(java.util.List tokens,
java.util.Map subMap)
removeStopWords
public java.util.List removeStopWords(java.util.List tokens)
getNumericOnly
public java.lang.String getNumericOnly(java.lang.String s)
getAlphaOnly
public java.lang.String getAlphaOnly(java.lang.String s)