public class DocumentTokenizer extends Object
Constructor and Description |
---|
DocumentTokenizer(TermDatabase tdb) |
Modifier and Type | Method and Description |
---|---|
void |
addTermNormalizer(TermNormalizerIF normalizer) |
TermDatabase |
getTermDatabase() |
void |
setDelimiterTrimmer(DelimiterTrimmerIF trimmer) |
void |
setTermDatabase(TermDatabase tdb) |
void |
setTokenizer(TokenizerIF tokenizer) |
void |
tokenize(Document doc) |
protected void |
tokenize(Region region) |
protected void |
tokenize(Region parent,
TextBlock tb) |
protected void |
tokenize(TextBlock tb,
String token) |
public DocumentTokenizer(TermDatabase tdb)
public TermDatabase getTermDatabase()
public void setTermDatabase(TermDatabase tdb)
public void setTokenizer(TokenizerIF tokenizer)
public void setDelimiterTrimmer(DelimiterTrimmerIF trimmer)
public void addTermNormalizer(TermNormalizerIF normalizer)
public void tokenize(Document doc)
protected void tokenize(Region region)