package gov.nih.nlm.nls.lvg.Flows;
import java.util.*;
import java.sql.*;
import gov.nih.nlm.nls.lvg.Lib.*;
import gov.nih.nlm.nls.lvg.Util.*;
import gov.nih.nlm.nls.lvg.Db.*;
import gov.nih.nlm.nls.lvg.Trie.*;
/*****************************************************************************
* This class provides features of generating derivational variants.
* Derivational variants are terms which are somehow related to the original
* term but do not share the same meaning. Often, the derivational variant
* changes syntactic category from the original term. Derivational variants
* are pre-computed and are put in Derivation table in Lvg database (facts).
* Derivations can also be generated by derivation rules through Lvg trie.
*
*
History:
*
* @author NLM LVG Development Team
*
* @see
* Design Document
*
* @version V-2004
****************************************************************************/
public class ToDerivation extends Transformation implements Cloneable
{
// public methods
/**
* Performs the mutation of this flow component.
*
* @param in a LexItem as the input for this flow component
* @param conn LVG database connection
* @param trie LVG persistent trie
* @param restrictFlag a numberical flag to restrict out into LVG_ONLY
* LVG_OR_ALL, or ALL (defined in OutputFilter).
* @param detailsFlag a boolean flag for processing details information
* @param mutateFlag a boolean flag for processing mutate information
*
* @return the results from this flow component - a collection (Vector)
* of LexItems
*
* @see DbBase
* @see OutputFilter
*/
public static Vector Mutate(LexItem in, Connection conn, RamTrie trie,
int restrictFlag, boolean detailsFlag, boolean mutateFlag)
{
// Mutate:
Vector out = GetDerivations(in, conn, trie, restrictFlag, INFO,
detailsFlag, mutateFlag);
return out;
}
/**
* A unit test driver for this flow component.
*/
public static void main(String[] args)
{
// load config file
Configuration conf = new Configuration("data.config.lvg", true);
String testStr = GetTestStr(args, "help"); // get input String
int minTermLen = Integer.parseInt(
conf.GetConfiguration(Configuration.MIN_TERM_LENGTH));
String lvgDir = conf.GetConfiguration(Configuration.LVG_DIR);
int minTrieStemLength = Integer.parseInt(
conf.GetConfiguration(Configuration.DIR_TRIE_STEM_LENGTH));
// Mutate: connect to DB
LexItem in = new LexItem(testStr, Category.ALL_BIT_VALUE,
Inflection.ALL_BIT_VALUE);
Vector outs = new Vector();
try
{
Connection conn = DbBase.OpenConnection(conf);
boolean isInflection = false;
RamTrie trie = new RamTrie(isInflection, minTermLen, lvgDir,
minTrieStemLength);
if(conn != null)
{
outs = ToDerivation.Mutate(in, conn, trie,
OutputFilter.LVG_ONLY, true, true);
}
DbBase.CloseConnection(conn);
}
catch (Exception e)
{
System.err.println(e.getMessage());
}
PrintResults(in, outs); // print out results
}
// private methods
/**
* Get the derivational variants using both facts (database) and rules
* (trie). The implementation algorithm is:
*
* - Facts:
*
* - Performs a case insensitive search on the input term and term1
* in the derivation table.
*
- Performs a case insensitive search on the input term and term2
* in the derivation table.
*
- Assigns term and category for both source and target.
*
* - Rules:
*
* - Uses persistent trie to apply rules (and check exceptions) on
* the input term.
*
- Assigns term and category for both source and target.
*
* - Display results according to the restriction filter.
*
- Sort the output by the frequency of categories.
*
*
* @param in a LexItem as the input for this flow component
* @param conn LVG database connection
* @param trie LVG persistent trie
* @param restrictFlag a numberical flag to restrict out into LVG_ONLY
* LVG_OR_ALL, or ALL (defined in OutputFilter).
* @param infoStr the header of detail information, usually is the
* full name of the current flow
*
* @return the results from this flow component - a collection (Vector)
* of LexItems
*
* @see DbBase
* @see OutputFilter
*/
protected static Vector GetDerivations(LexItem in, Connection conn,
RamTrie trie, int restrictFlag, String infoStr, boolean detailsFlag,
boolean mutateFlag)
{
// init the input string and output Vector
String inStr = in.GetSourceTerm();
Vector outs = new Vector();
long inCat = in.GetSourceCategory().GetValue();
long inInfl = in.GetSourceInflection().GetValue();
try
{
// Fact: get derivation from database
Vector factList = DbDerivation.GetDerivations(inStr, conn);
// update LexItems
for(int i = 0; i < factList.size(); i++)
{
DerivationRecord record =
(DerivationRecord) factList.elementAt(i);
String term = record.GetTarget();
long curCat = record.GetSourceCat();
// input filter for category;
// inflection is not in the database table, can't be checked
if(InputFilter.IsLegal(inCat, curCat) == false)
{
continue;
}
// details & mutate
String details = null;
String mutate = null;
if(detailsFlag == true)
{
details = infoStr + " (FACT)";
}
if(mutateFlag == true)
{
mutate = "FACT" + GlobalBehavior.GetFieldSeparator() +
record.GetString(GlobalBehavior.GetFieldSeparator());
}
LexItem temp = UpdateLexItem(in, term, Flow.DERIVATION,
record.GetTargetCat(),
Inflection.GetBitValue(Inflection.BASE_BIT),
details, mutate);
outs.addElement(temp);
}
// Rule: rule generated derivations
// Rule: Use trie to get the result from rule
Vector ruleList =
trie.GetDerivationsByRules(inStr, inCat, inInfl, true);
// update LexItems
for(int i = 0; i < ruleList.size(); i++)
{
RuleResult record = (RuleResult) ruleList.elementAt(i);
String term = record.GetOutTerm();
// details & mutate
String details = null;
String mutate = null;
if(detailsFlag == true)
{
details = infoStr + " (RULE|" + record.GetRuleString()
+ ")";
}
if(mutateFlag == true)
{
mutate = "RULE" + GlobalBehavior.GetFieldSeparator()
+ record.GetRuleString()
+ GlobalBehavior.GetFieldSeparator();
}
LexItem temp = UpdateLexItem(in, term, Flow.DERIVATION,
Category.ToValue(record.GetOutCategory()),
Inflection.ToValue(record.GetOutInflection()),
details, mutate);
outs.addElement(temp);
}
// Restrict the outputs
outs = RestrictDerivations(outs, conn, restrictFlag);
}
catch (SQLException e)
{
System.out.println("** Error: Sql Exception in ToDerivation Flow.");
}
// Sort: category, length, case incentive sort
LexItemComparator lc = new LexItemComparator();
lc.SetRule(LexItemComparator.LVG_RULE);
Collections.sort(outs, lc);
return outs;
}
// protected methods
protected static Vector RestrictDerivations(Vector in, Connection conn,
int restrictFlag) throws SQLException
{
Vector out = new Vector();
switch(restrictFlag)
{
// uninflected terms in Lvg, if no terms, return all
case OutputFilter.LVG_OR_ALL:
out = VerifyDerivationsFromLvg(in, conn);
if(out.size() == 0)
{
out.addAll(in);
}
break;
case OutputFilter.ALL:
out.addAll(in);
break;
case OutputFilter.LVG_ONLY:
default:
out = VerifyDerivationsFromLvg(in, conn);
break;
}
return out;
}
// return only terms which are uninflected terms in Lvg database
private static Vector VerifyDerivationsFromLvg(Vector in, Connection conn)
throws SQLException
{
Vector out = new Vector();
for(int i = 0; i < in.size(); i++)
{
LexItem cur = (LexItem) in.elementAt(i);
String derivation = cur.GetTargetTerm();
if(DbUninflection.IsExistUninflectedTerm(derivation, conn) == true)
{
out.addElement(cur);
}
}
return out;
}
// data members
private static final String INFO = "Derivation";
}