edu.umass.cs.mallet.projects.seg_plus_coref.coreference
Class CitationUtils

java.lang.Object
  extended byedu.umass.cs.mallet.projects.seg_plus_coref.coreference.CitationUtils

public class CitationUtils
extends java.lang.Object


Field Summary
static java.lang.String AUTHOR
           
static java.lang.String PAPER
           
static java.lang.String[] SEPERATOR
           
static java.lang.String VENUE
           
 
Constructor Summary
CitationUtils()
           
 
Method Summary
static void addAuthorsFromLine(java.lang.String str, java.util.ArrayList nodes, IEInterface ieInterface, boolean useCRFLocal, int numNBest, int nthViterbi)
           
static void addPaperFromLine(java.lang.String str, java.util.ArrayList nodes, IEInterface ieInterface, boolean useCRFLocal, int numNBest, int nthViterbi)
           
static void addVenuesFromLine(java.lang.String str, java.util.ArrayList nodes, IEInterface ieInterface, boolean useCRFLocal, int numNBest, int nthViterbi)
           
static com.wcohen.secondstring.StringDistance computeDistanceMetric(java.util.ArrayList nodes)
           
static java.util.ArrayList computeNodes(java.util.ArrayList trainFileArray, IEInterface ieInterface, boolean useCRFLocal, int numNBest, int nthViterbi)
           
static java.util.ArrayList computeNodes(java.util.ArrayList trainFileArray, IEInterface ieInterface, boolean useCRFLocal, int numNBest, int nthViterbi, java.lang.String nodeType)
          Build nodes with labels corresponding to cluster ids.
protected static java.util.ArrayList computeNodesWPubs(java.util.ArrayList trainFileArray, java.util.ArrayList publications, IEInterface ieInterface, boolean useCRFLocal, int numNBest, int nthViterbi)
           
static java.util.ArrayList computeNodesWPubs(java.util.ArrayList trainFileArray, java.util.ArrayList publications, IEInterface ieInterface, int numNBest, int nthViterbi)
           
static void evaluateClustering(java.util.Collection key, java.util.Collection pred, java.lang.String description)
           
static java.util.Collection makeCollections(java.util.ArrayList nodes)
           
static void makeDistMetric(java.util.List list, com.wcohen.secondstring.StringDistance tfidf, com.wcohen.secondstring.StringDistance triGramDistanceMetric)
           
static InstanceList makePairs(Pipe instancePipe, java.util.ArrayList nodes)
           
static InstanceList makePairs(Pipe instancePipe, java.util.ArrayList nodes, double negativeProb)
           
static InstanceList makePairs(Pipe instancePipe, java.util.ArrayList nodes, java.util.List pairs)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

PAPER

public static final java.lang.String PAPER
See Also:
Constant Field Values

VENUE

public static final java.lang.String VENUE
See Also:
Constant Field Values

AUTHOR

public static final java.lang.String AUTHOR
See Also:
Constant Field Values

SEPERATOR

public static java.lang.String[] SEPERATOR
Constructor Detail

CitationUtils

public CitationUtils()
Method Detail

computeNodes

public static java.util.ArrayList computeNodes(java.util.ArrayList trainFileArray,
                                               IEInterface ieInterface,
                                               boolean useCRFLocal,
                                               int numNBest,
                                               int nthViterbi)

addPaperFromLine

public static void addPaperFromLine(java.lang.String str,
                                    java.util.ArrayList nodes,
                                    IEInterface ieInterface,
                                    boolean useCRFLocal,
                                    int numNBest,
                                    int nthViterbi)

addVenuesFromLine

public static void addVenuesFromLine(java.lang.String str,
                                     java.util.ArrayList nodes,
                                     IEInterface ieInterface,
                                     boolean useCRFLocal,
                                     int numNBest,
                                     int nthViterbi)

addAuthorsFromLine

public static void addAuthorsFromLine(java.lang.String str,
                                      java.util.ArrayList nodes,
                                      IEInterface ieInterface,
                                      boolean useCRFLocal,
                                      int numNBest,
                                      int nthViterbi)

computeNodes

public static java.util.ArrayList computeNodes(java.util.ArrayList trainFileArray,
                                               IEInterface ieInterface,
                                               boolean useCRFLocal,
                                               int numNBest,
                                               int nthViterbi,
                                               java.lang.String nodeType)
Build nodes with labels corresponding to cluster ids. Passing different strings for nodeType will make node correspond to the paper, author, or venue, etc. Default is paper.

Parameters:
trainFileArray - list of citation files
ieInterface - the interface to the extraction
useCRFLocal - use a CRF for segmentation
numNBest - number of viterbi paths to use
nodeType - type of node to create (paper, author, venue...)

computeNodesWPubs

public static java.util.ArrayList computeNodesWPubs(java.util.ArrayList trainFileArray,
                                                    java.util.ArrayList publications,
                                                    IEInterface ieInterface,
                                                    int numNBest,
                                                    int nthViterbi)

computeNodesWPubs

protected static java.util.ArrayList computeNodesWPubs(java.util.ArrayList trainFileArray,
                                                       java.util.ArrayList publications,
                                                       IEInterface ieInterface,
                                                       boolean useCRFLocal,
                                                       int numNBest,
                                                       int nthViterbi)

computeDistanceMetric

public static com.wcohen.secondstring.StringDistance computeDistanceMetric(java.util.ArrayList nodes)

makeDistMetric

public static void makeDistMetric(java.util.List list,
                                  com.wcohen.secondstring.StringDistance tfidf,
                                  com.wcohen.secondstring.StringDistance triGramDistanceMetric)

makePairs

public static InstanceList makePairs(Pipe instancePipe,
                                     java.util.ArrayList nodes)

makePairs

public static InstanceList makePairs(Pipe instancePipe,
                                     java.util.ArrayList nodes,
                                     double negativeProb)
Parameters:
instancePipe - to pipe instances through
nodes - Citation nodes
negativeProb - the probability of including each negative instance (to reduce class disparity)

makePairs

public static InstanceList makePairs(Pipe instancePipe,
                                     java.util.ArrayList nodes,
                                     java.util.List pairs)

makeCollections

public static java.util.Collection makeCollections(java.util.ArrayList nodes)

evaluateClustering

public static void evaluateClustering(java.util.Collection key,
                                      java.util.Collection pred,
                                      java.lang.String description)