|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectedu.stanford.nlp.parser.lexparser.BaseLexicon
public class BaseLexicon
This is the default concrete instantiation of the Lexicon interface. It was originally built for Penn Treebank English
Field Summary | |
---|---|
protected int |
lastSentencePosition
|
protected int |
lastSignatureIndex
We cache the last signature looked up, because it asks for the same one many times when an unknown word is encountered! (Note that under the current scheme, one unknown word, if seen sentence-initially and non-initially, will be parsed with two different signatures....) |
protected int |
lastWordToSignaturize
|
protected static short |
nullTag
|
protected static int |
nullWord
|
protected java.util.Set |
rules
|
protected java.util.List[] |
rulesWithWord
|
protected Counter |
seenCounter
|
protected java.util.Set |
sigs
|
protected boolean |
smartMutation
|
protected int |
smoothInUnknownsThreshold
|
protected java.util.Set |
tags
|
protected int |
unknownLevel
|
protected Counter |
unSeenCounter
|
protected java.util.Set |
words
|
Fields inherited from interface edu.stanford.nlp.parser.lexparser.Lexicon |
---|
BOUNDARY, BOUNDARY_TAG, UNKNOWN_WORD |
Constructor Summary | |
---|---|
BaseLexicon()
|
|
BaseLexicon(edu.stanford.nlp.parser.lexparser.Options.LexOptions op)
|
Method Summary | |
---|---|
protected void |
addTagging(boolean seen,
IntTaggedWord itw,
double count)
Adds the tagging with count to the data structures in this Lexicon. |
double |
evaluateCoverage(java.util.Collection trees,
java.util.Set missingWords,
java.util.Set missingTags,
java.util.Set missingTW)
Evaluates how many words (= terminals) in a collection of trees are covered by the lexicon. |
protected java.lang.String |
getSignature(java.lang.String word,
int loc)
This routine returns a String that is the "signature" of the class of a word. |
protected int |
getSignatureIndex(int wordIndex,
int sentencePosition)
Returns the index of the signature of the word numbered wordIndex, where the signature is the String representation of unknown word features. |
protected void |
initRulesWithWord()
|
boolean |
isKnown(int word)
Checks whether a word is in the lexicon. |
boolean |
isKnown(java.lang.String word)
Checks whether a word is in the lexicon. |
void |
printLexStats()
|
void |
readData(java.io.BufferedReader in)
Populates data in this Lexicon from the character stream given by the Reader r. |
java.util.Iterator |
ruleIteratorByWord(int word,
int loc)
Get an iterator over all rules with this word and loc |
double |
score(IntTaggedWord iTW,
int loc)
Get the score of this word with this tag (as an IntTaggedWord) at this loc (Presumably estimate of P(word | tag)) |
void |
train(java.util.Collection trees)
Trains this lexicon on the Collection of trees. |
protected java.util.List |
treeToEvents(Tree tree)
|
void |
tune(java.util.Collection trees)
|
void |
writeData(java.io.Writer w)
Writes out data from this Object to the Writer w. |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
protected int unknownLevel
protected int smoothInUnknownsThreshold
protected boolean smartMutation
protected transient java.util.List[] rulesWithWord
protected transient java.util.Set rules
protected transient java.util.Set tags
protected transient java.util.Set words
protected transient java.util.Set sigs
protected Counter seenCounter
protected Counter unSeenCounter
protected static final int nullWord
protected static final short nullTag
protected transient int lastSignatureIndex
protected transient int lastSentencePosition
protected transient int lastWordToSignaturize
Constructor Detail |
---|
public BaseLexicon()
public BaseLexicon(edu.stanford.nlp.parser.lexparser.Options.LexOptions op)
Method Detail |
---|
public boolean isKnown(int word)
Lexicon
isKnown
in interface Lexicon
word
- The word as an int
public boolean isKnown(java.lang.String word)
isKnown
in interface Lexicon
word
- The word as a String
public java.util.Iterator ruleIteratorByWord(int word, int loc)
Lexicon
ruleIteratorByWord
in interface Lexicon
protected void initRulesWithWord()
protected java.util.List treeToEvents(Tree tree)
public void train(java.util.Collection trees)
train
in interface Lexicon
protected void addTagging(boolean seen, IntTaggedWord itw, double count)
protected int getSignatureIndex(int wordIndex, int sentencePosition)
protected java.lang.String getSignature(java.lang.String word, int loc)
word
- The word to make a signature forloc
- Its position in the sentence (mainly so sentence-initial
capitalized words can be treated differently)
public double score(IntTaggedWord iTW, int loc)
Lexicon
score
in interface Lexicon
public void tune(java.util.Collection trees)
public void readData(java.io.BufferedReader in) throws java.io.IOException
readData
in interface Lexicon
java.io.IOException
public void writeData(java.io.Writer w) throws java.io.IOException
writeData
in interface Lexicon
w
- the writer to output to
java.io.IOException
public void printLexStats()
public double evaluateCoverage(java.util.Collection trees, java.util.Set missingWords, java.util.Set missingTags, java.util.Set missingTW)
|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |