package gov.nih.nlm.nls.lvg.Flows; import java.util.*; import java.sql.*; import gov.nih.nlm.nls.lvg.Lib.*; import gov.nih.nlm.nls.lvg.Util.*; import gov.nih.nlm.nls.lvg.Db.*; import gov.nih.nlm.nls.lvg.Trie.*; /***************************************************************************** * This class provides features of generating derivational variants. * Derivational variants are terms which are somehow related to the original * term but do not share the same meaning. Often, the derivational variant * changes syntactic category from the original term. Derivational variants * are pre-computed and are put in Derivation table in Lvg database (facts). * Derivations can also be generated by derivation rules through Lvg trie. * *

History: * * @author NLM LVG Development Team * * @see * Design Document * * @version V-2004 ****************************************************************************/ public class ToDerivation extends Transformation implements Cloneable { // public methods /** * Performs the mutation of this flow component. * * @param in a LexItem as the input for this flow component * @param conn LVG database connection * @param trie LVG persistent trie * @param restrictFlag a numberical flag to restrict out into LVG_ONLY * LVG_OR_ALL, or ALL (defined in OutputFilter). * @param detailsFlag a boolean flag for processing details information * @param mutateFlag a boolean flag for processing mutate information * * @return the results from this flow component - a collection (Vector) * of LexItems * * @see DbBase * @see OutputFilter */ public static Vector Mutate(LexItem in, Connection conn, RamTrie trie, int restrictFlag, boolean detailsFlag, boolean mutateFlag) { // Mutate: Vector out = GetDerivations(in, conn, trie, restrictFlag, INFO, detailsFlag, mutateFlag); return out; } /** * A unit test driver for this flow component. */ public static void main(String[] args) { // load config file Configuration conf = new Configuration("data.config.lvg", true); String testStr = GetTestStr(args, "help"); // get input String int minTermLen = Integer.parseInt( conf.GetConfiguration(Configuration.MIN_TERM_LENGTH)); String lvgDir = conf.GetConfiguration(Configuration.LVG_DIR); int minTrieStemLength = Integer.parseInt( conf.GetConfiguration(Configuration.DIR_TRIE_STEM_LENGTH)); // Mutate: connect to DB LexItem in = new LexItem(testStr, Category.ALL_BIT_VALUE, Inflection.ALL_BIT_VALUE); Vector outs = new Vector(); try { Connection conn = DbBase.OpenConnection(conf); boolean isInflection = false; RamTrie trie = new RamTrie(isInflection, minTermLen, lvgDir, minTrieStemLength); if(conn != null) { outs = ToDerivation.Mutate(in, conn, trie, OutputFilter.LVG_ONLY, true, true); } DbBase.CloseConnection(conn); } catch (Exception e) { System.err.println(e.getMessage()); } PrintResults(in, outs); // print out results } // private methods /** * Get the derivational variants using both facts (database) and rules * (trie). The implementation algorithm is: *

* * @param in a LexItem as the input for this flow component * @param conn LVG database connection * @param trie LVG persistent trie * @param restrictFlag a numberical flag to restrict out into LVG_ONLY * LVG_OR_ALL, or ALL (defined in OutputFilter). * @param infoStr the header of detail information, usually is the * full name of the current flow * * @return the results from this flow component - a collection (Vector) * of LexItems * * @see DbBase * @see OutputFilter */ protected static Vector GetDerivations(LexItem in, Connection conn, RamTrie trie, int restrictFlag, String infoStr, boolean detailsFlag, boolean mutateFlag) { // init the input string and output Vector String inStr = in.GetSourceTerm(); Vector outs = new Vector(); long inCat = in.GetSourceCategory().GetValue(); long inInfl = in.GetSourceInflection().GetValue(); try { // Fact: get derivation from database Vector factList = DbDerivation.GetDerivations(inStr, conn); // update LexItems for(int i = 0; i < factList.size(); i++) { DerivationRecord record = (DerivationRecord) factList.elementAt(i); String term = record.GetTarget(); long curCat = record.GetSourceCat(); // input filter for category; // inflection is not in the database table, can't be checked if(InputFilter.IsLegal(inCat, curCat) == false) { continue; } // details & mutate String details = null; String mutate = null; if(detailsFlag == true) { details = infoStr + " (FACT)"; } if(mutateFlag == true) { mutate = "FACT" + GlobalBehavior.GetFieldSeparator() + record.GetString(GlobalBehavior.GetFieldSeparator()); } LexItem temp = UpdateLexItem(in, term, Flow.DERIVATION, record.GetTargetCat(), Inflection.GetBitValue(Inflection.BASE_BIT), details, mutate); outs.addElement(temp); } // Rule: rule generated derivations // Rule: Use trie to get the result from rule Vector ruleList = trie.GetDerivationsByRules(inStr, inCat, inInfl, true); // update LexItems for(int i = 0; i < ruleList.size(); i++) { RuleResult record = (RuleResult) ruleList.elementAt(i); String term = record.GetOutTerm(); // details & mutate String details = null; String mutate = null; if(detailsFlag == true) { details = infoStr + " (RULE|" + record.GetRuleString() + ")"; } if(mutateFlag == true) { mutate = "RULE" + GlobalBehavior.GetFieldSeparator() + record.GetRuleString() + GlobalBehavior.GetFieldSeparator(); } LexItem temp = UpdateLexItem(in, term, Flow.DERIVATION, Category.ToValue(record.GetOutCategory()), Inflection.ToValue(record.GetOutInflection()), details, mutate); outs.addElement(temp); } // Restrict the outputs outs = RestrictDerivations(outs, conn, restrictFlag); } catch (SQLException e) { System.out.println("** Error: Sql Exception in ToDerivation Flow."); } // Sort: category, length, case incentive sort LexItemComparator lc = new LexItemComparator(); lc.SetRule(LexItemComparator.LVG_RULE); Collections.sort(outs, lc); return outs; } // protected methods protected static Vector RestrictDerivations(Vector in, Connection conn, int restrictFlag) throws SQLException { Vector out = new Vector(); switch(restrictFlag) { // uninflected terms in Lvg, if no terms, return all case OutputFilter.LVG_OR_ALL: out = VerifyDerivationsFromLvg(in, conn); if(out.size() == 0) { out.addAll(in); } break; case OutputFilter.ALL: out.addAll(in); break; case OutputFilter.LVG_ONLY: default: out = VerifyDerivationsFromLvg(in, conn); break; } return out; } // return only terms which are uninflected terms in Lvg database private static Vector VerifyDerivationsFromLvg(Vector in, Connection conn) throws SQLException { Vector out = new Vector(); for(int i = 0; i < in.size(); i++) { LexItem cur = (LexItem) in.elementAt(i); String derivation = cur.GetTargetTerm(); if(DbUninflection.IsExistUninflectedTerm(derivation, conn) == true) { out.addElement(cur); } } return out; } // data members private static final String INFO = "Derivation"; }