00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #include <ncbi_pch.hpp>
00033 #include "file_loader.hpp"
00034
00035 #include <corelib/ncbifile.hpp>
00036
00037 #include <gui/core/plugin_registry.hpp>
00038 #include <gui/core/plugin_utils.hpp>
00039 #include <gui/core/version.hpp>
00040 #include <gui/objutils/obj_convert.hpp>
00041 #include <gui/objects/ProjectItem.hpp>
00042 #include <gui/objects/PluginInfoSet.hpp>
00043 #include <gui/objects/PluginValueConstraint.hpp>
00044
00045 #include <util/xregexp/regexp.hpp>
00046 #include <util/static_map.hpp>
00047 #include <algo/phy_tree/phy_node.hpp>
00048 #include <objtools/readers/aln_reader.hpp>
00049 #include <objtools/readers/rm_reader.hpp>
00050
00051 #include "file_type_chooser_dlg.hpp"
00052
00053 BEGIN_NCBI_SCOPE
00054 USING_SCOPE(objects);
00055
00056
00057 typedef pair<const char*, CDataPlugin_FileImport::EFormat> TFormatPair;
00058 static const TFormatPair sc_FormatArray[] = {
00059 TFormatPair("AGP format assembly", CDataPlugin_FileImport::eFormat_AGP),
00060 TFormatPair("Autodetect", CDataPlugin_FileImport::eFormat_Autodetect),
00061 TFormatPair("Distance Matrix", CDataPlugin_FileImport::eFormat_DistanceMatrix),
00062 TFormatPair("FASTA sequence", CDataPlugin_FileImport::eFormat_FastA),
00063 TFormatPair("Five-Column Feature Table", CDataPlugin_FileImport::eFormat_FiveColumn),
00064 TFormatPair("GFF\\/GTF format annotations", CDataPlugin_FileImport::eFormat_GTF),
00065 TFormatPair("Glimmer3 Predictions", CDataPlugin_FileImport::eFormat_Glimmer3),
00066 TFormatPair("NCBI Binary ASN.1", CDataPlugin_FileImport::eFormat_AsnBinary),
00067 TFormatPair("NCBI Text ASN.1", CDataPlugin_FileImport::eFormat_AsnText),
00068 TFormatPair("NCBI XML", CDataPlugin_FileImport::eFormat_XML),
00069 TFormatPair("Newick-format Phylogenetic Tree", CDataPlugin_FileImport::eFormat_NewickTree),
00070 TFormatPair("Phrap\\/ACE-format assembly", CDataPlugin_FileImport::eFormat_Phrap),
00071 TFormatPair("RepeatMasker OUT", CDataPlugin_FileImport::eFormat_RepeatMasker),
00072 TFormatPair("SNP Markers", CDataPlugin_FileImport::eFormat_SNPMarkers),
00073 TFormatPair("Table", CDataPlugin_FileImport::eFormat_Table),
00074 TFormatPair("Taxplot File", CDataPlugin_FileImport::eFormat_Taxplot),
00075 TFormatPair("Text alignment", CDataPlugin_FileImport::eFormat_TextAlign),
00076 };
00077 typedef CStaticArrayMap<const char*, CDataPlugin_FileImport::EFormat, PCase_CStr> TFormatMap;
00078 DEFINE_STATIC_ARRAY_MAP(TFormatMap, sc_Formats, sc_FormatArray);
00079
00080
00081 static const TFormatPair sc_SynFormatArray[] = {
00082 TFormatPair("agp", CDataPlugin_FileImport::eFormat_AGP),
00083 TFormatPair("asnbin", CDataPlugin_FileImport::eFormat_AsnBinary),
00084 TFormatPair("asntext", CDataPlugin_FileImport::eFormat_AsnText),
00085 TFormatPair("auto", CDataPlugin_FileImport::eFormat_Autodetect),
00086 TFormatPair("dist", CDataPlugin_FileImport::eFormat_DistanceMatrix),
00087 TFormatPair("fasta", CDataPlugin_FileImport::eFormat_FastA),
00088 TFormatPair("fivecol", CDataPlugin_FileImport::eFormat_FiveColumn),
00089 TFormatPair("glimmer", CDataPlugin_FileImport::eFormat_Glimmer3),
00090 TFormatPair("gtf", CDataPlugin_FileImport::eFormat_GTF),
00091 TFormatPair("newick", CDataPlugin_FileImport::eFormat_NewickTree),
00092 TFormatPair("phrap", CDataPlugin_FileImport::eFormat_Phrap),
00093 TFormatPair("rmo", CDataPlugin_FileImport::eFormat_RepeatMasker),
00094 TFormatPair("smk", CDataPlugin_FileImport::eFormat_SNPMarkers),
00095 TFormatPair("table", CDataPlugin_FileImport::eFormat_Table),
00096 TFormatPair("taxplot", CDataPlugin_FileImport::eFormat_Taxplot),
00097 TFormatPair("textalign", CDataPlugin_FileImport::eFormat_TextAlign),
00098 TFormatPair("xml", CDataPlugin_FileImport::eFormat_XML)
00099 };
00100 DEFINE_STATIC_ARRAY_MAP(TFormatMap, sc_SynFormats, sc_SynFormatArray);
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144 static CDataPlugin_FileImport::EFormat s_GetFormatFromString(const string& fmt_str)
00145 {
00146 CDataPlugin_FileImport::EFormat fmt = CDataPlugin_FileImport::eFormat_Unknown;
00147 TFormatMap::const_iterator iter = sc_Formats.find(fmt_str.c_str());
00148 if (iter != sc_Formats.end()) {
00149 fmt = iter->second;
00150 } else {
00151 iter = sc_SynFormats.find(fmt_str.c_str());
00152 if (iter != sc_SynFormats.end()) {
00153 fmt = iter->second;
00154 }
00155 }
00156 return fmt;
00157 }
00158
00159 static string
00160 s_GetStringFromFormat(CDataPlugin_FileImport::EFormat fmt)
00161 {
00162 ITERATE (TFormatMap, iter, sc_Formats) {
00163 if (iter->second == fmt) {
00164 return string(iter->first);
00165 }
00166 }
00167
00168 ITERATE (TFormatMap, iter, sc_SynFormats) {
00169 if (iter->second == fmt) {
00170 return string(iter->first);
00171 }
00172 }
00173
00174 return string();
00175 }
00176
00177
00178
00179 static void s_AddDefaultArguments(CPluginCommand& args)
00180 {
00181 args.AddArgument("file", "File name", CPluginArg::eInputFile);
00182 args.AddDefaultArgument("fmt", "Format",
00183 CPluginArg::eString,
00184 s_GetStringFromFormat(CDataPlugin_FileImport::eFormat_Autodetect));
00185
00186 {{
00187 CRef<CPluginValueConstraint> cons(CPluginValueConstraint::CreateSet());
00188 ITERATE (TFormatMap, iter, sc_Formats) {
00189 *cons, string(iter->first);
00190 }
00191 args.SetConstraint("fmt", *cons);
00192 }}
00193 }
00194
00195
00196 void CDataPlugin_FileImport::GetInfo(CPluginInfoSet& info_set)
00197 {
00198 {{
00199 CPluginInfo& info =
00200 info_set.CreateInfo(CPluginVersion::eMajor, CPluginVersion::eMinor, 0,
00201 string(__DATE__) + " " + string(__TIME__),
00202 "CDataPlugin_FileImport",
00203 "File", "Load information from a file", "");
00204 info.SetAffinity(CPluginInfo::eAffinity_foreground);
00205
00206
00207 CPluginCommand& load_args = info.SetDataCommand(CPluginCommand::eCommand_load);
00208 s_AddDefaultArguments(load_args);
00209 }}
00210
00211
00212 {{
00213 CPluginInfo& info =
00214 info_set.CreateInfo(CPluginVersion::eMajor, CPluginVersion::eMinor, 0,
00215 string(__DATE__) + " " + string(__TIME__),
00216 "CDataPlugin_FileImport",
00217 "File", "Import information from a file", "");
00218 info.SetAffinity(CPluginInfo::eAffinity_foreground);
00219
00220 CPluginCommand& import_args = info.SetDataCommand(CPluginCommand::eCommand_import);
00221 import_args.AddArgument("project", "Project to Import Into", CPluginArg::eProject);
00222 s_AddDefaultArguments(import_args);
00223 }}
00224
00225 }
00226
00227
00228 CDataPlugin_FileImport::CDataPlugin_FileImport()
00229 {
00230 }
00231
00232
00233 CDataPlugin_FileImport::~CDataPlugin_FileImport()
00234 {
00235 }
00236
00237
00238
00239 void CDataPlugin_FileImport::Import(CPluginMessage& msg)
00240 {
00241 x_Load(msg);
00242 }
00243
00244
00245
00246 void CDataPlugin_FileImport::Load(CPluginMessage& msg)
00247 {
00248 x_Load(msg);
00249 }
00250
00251
00252 void CDataPlugin_FileImport::x_Load(CPluginMessage& msg)
00253 {
00254 const CPluginCommand& args = msg.GetRequest().GetCommand();
00255 CPluginReply& reply = msg.SetReply();
00256
00257 const CGBProjectHandle* project = NULL;
00258 if (args.HasArgument("project")) {
00259 project = args["project"].GetProject();
00260 }
00261
00262 string fname = args["file"].AsString();
00263 if (CFile(fname).Exists()) {
00264
00265 EFormat fmt = eFormat_Unknown;
00266 {{
00267 string fmt_str = args["fmt"].AsString();
00268 fmt = s_GetFormatFromString(fmt_str);
00269 }}
00270 if (fmt == eFormat_Unknown || fmt == eFormat_Autodetect) {
00271 CNcbiIfstream istr(fname.c_str(), ios::binary);
00272 fmt = x_GetFormat(istr);
00273 }
00274
00275 if (fmt == eFormat_Unknown) {
00276 CFileTypeChooserDlg dlg;
00277 ITERATE (TFormatMap, iter, sc_Formats) {
00278 if (iter->second == eFormat_Autodetect) {
00279 continue;
00280 }
00281 dlg.AddType(iter->first);
00282 }
00283 dlg.CenterOnActive();
00284 if (dlg.ShowModal() != eOK) {
00285 reply.SetStatus(eMessageStatus_ignored);
00286 return;
00287 }
00288
00289 string type_str = dlg.GetType();
00290 ITERATE (TFormatMap, iter, sc_Formats) {
00291 if (iter->first == type_str) {
00292 fmt = iter->second;
00293 break;
00294 }
00295 }
00296
00297 if (fmt == eFormat_Unknown) {
00298 reply.AddAction(CPluginReplyAction::e_Error,
00299 "The type you selected was not understood");
00300 return;
00301 }
00302 }
00303
00304 CRef<CPluginMessage> msg;
00305 CPluginUtils::TDispatchFlags disp_flags = CPluginUtils::fDispatchDefaults;
00306
00307 switch (fmt) {
00308
00309
00310
00311 case eFormat_FastA:
00312 {{
00313 CPluginHandle ph =
00314 CPluginRegistry::GetPlugin("CDataPlugin_FileImport_Fasta");
00315 if (ph) {
00316 CPluginCommand::ECommand cmd = CPluginCommand::eCommand_load;
00317 if (project) {
00318 cmd = CPluginCommand::eCommand_import;
00319 }
00320 msg = ph.CreateMessage(cmd);
00321 disp_flags |= CPluginUtils::fResolveArgs;
00322 }
00323 }}
00324 break;
00325
00326
00327
00328
00329 case eFormat_AsnBinary:
00330 case eFormat_AsnText:
00331 case eFormat_XML:
00332 {{
00333 CPluginHandle ph =
00334 CPluginRegistry::GetPlugin("CDataPlugin_FileImport_Ncbi");
00335 if (ph) {
00336 CPluginCommand::ECommand cmd = CPluginCommand::eCommand_load;
00337 if (project) {
00338 cmd = CPluginCommand::eCommand_import;
00339 }
00340 msg = ph.CreateMessage(cmd);
00341 string serial_fmt;
00342 if (fmt == eFormat_AsnBinary) {
00343 serial_fmt = "ASN.1 Binary";
00344 } else if (fmt == eFormat_AsnText) {
00345 serial_fmt = "ASN.1 Text";
00346 } else {
00347 serial_fmt = "XML";
00348 }
00349 msg->SetRequest().SetCommand()["format"].SetString(serial_fmt);
00350 }
00351 }}
00352 break;
00353
00354
00355
00356
00357 case eFormat_AGP:
00358 {{
00359 CPluginHandle ph =
00360 CPluginRegistry::GetPlugin("CDataPlugin_FileImport_Agp");
00361 if (ph) {
00362 CPluginCommand::ECommand cmd = CPluginCommand::eCommand_load;
00363 if (project) {
00364 cmd = CPluginCommand::eCommand_import;
00365 }
00366 msg = ph.CreateMessage(cmd);
00367 disp_flags |= CPluginUtils::fResolveArgs;
00368 }
00369 }}
00370 break;
00371
00372
00373
00374
00375 case eFormat_GTF:
00376 {{
00377 CPluginHandle ph =
00378 CPluginRegistry::GetPlugin("CDataPlugin_FileImport_Gff");
00379 if (ph) {
00380 CPluginCommand::ECommand cmd = CPluginCommand::eCommand_load;
00381 if (project) {
00382 cmd = CPluginCommand::eCommand_import;
00383 }
00384 msg = ph.CreateMessage(cmd);
00385 disp_flags |= CPluginUtils::fResolveArgs;
00386 }
00387 }}
00388 break;
00389
00390
00391
00392
00393 case eFormat_Glimmer3:
00394 {{
00395 CPluginHandle ph =
00396 CPluginRegistry::GetPlugin("CDataPlugin_FileImport_Glimmer");
00397 if (ph) {
00398 CPluginCommand::ECommand cmd = CPluginCommand::eCommand_load;
00399 if (project) {
00400 cmd = CPluginCommand::eCommand_import;
00401 }
00402 msg = ph.CreateMessage(cmd);
00403 disp_flags |= CPluginUtils::fResolveArgs;
00404 }
00405 }}
00406 break;
00407
00408
00409
00410
00411 case eFormat_NewickTree:
00412 {{
00413 CPluginHandle ph =
00414 CPluginRegistry::GetPlugin("CDataPlugin_FileImport_Newick");
00415 if (ph) {
00416 CPluginCommand::ECommand cmd = CPluginCommand::eCommand_load;
00417 if (project) {
00418 cmd = CPluginCommand::eCommand_import;
00419 }
00420 msg = ph.CreateMessage(cmd);
00421 disp_flags |= CPluginUtils::fResolveArgs;
00422 }
00423 }}
00424 break;
00425
00426
00427
00428
00429 case eFormat_Phrap:
00430 {{
00431 CPluginHandle ph =
00432 CPluginRegistry::GetPlugin("CDataPlugin_FileImport_Phrap");
00433 if (ph) {
00434 CPluginCommand::ECommand cmd = CPluginCommand::eCommand_load;
00435 if (project) {
00436 cmd = CPluginCommand::eCommand_import;
00437 }
00438 msg = ph.CreateMessage(cmd);
00439 disp_flags |= CPluginUtils::fResolveArgs;
00440 }
00441 }}
00442 break;
00443
00444
00445
00446
00447 case eFormat_TextAlign:
00448 {{
00449 CPluginHandle ph =
00450 CPluginRegistry::GetPlugin("CDataPlugin_FileImport_Alignment");
00451 if (ph) {
00452 CPluginCommand::ECommand cmd = CPluginCommand::eCommand_load;
00453 if (project) {
00454 cmd = CPluginCommand::eCommand_import;
00455 }
00456 msg = ph.CreateMessage(cmd);
00457 disp_flags |= CPluginUtils::fResolveArgs;
00458 }
00459 }}
00460 break;
00461
00462
00463
00464
00465 case eFormat_RepeatMasker:
00466 {{
00467 CPluginHandle ph =
00468 CPluginRegistry::GetPlugin( "CDataPlugin_FileImport_RepeatMasker" );
00469 if ( ph ) {
00470 CPluginCommand::ECommand cmd = CPluginCommand::eCommand_load;
00471 if ( project ) {
00472 cmd = CPluginCommand::eCommand_import;
00473 }
00474 msg = ph.CreateMessage( cmd );
00475 disp_flags |= CPluginUtils::fResolveArgs;
00476 }
00477 }}
00478 break;
00479
00480
00481
00482
00483 case eFormat_DistanceMatrix:
00484 {{
00485 CPluginHandle ph =
00486 CPluginRegistry::GetPlugin("CDataPlugin_FileImport_DistMat");
00487 if (ph) {
00488 CPluginCommand::ECommand cmd = CPluginCommand::eCommand_load;
00489 if (project) {
00490 cmd = CPluginCommand::eCommand_import;
00491 }
00492 msg = ph.CreateMessage(cmd);
00493
00494
00495
00496 }
00497 }}
00498 break;
00499
00500
00501
00502
00503 case eFormat_FiveColumn:
00504 {{
00505 CPluginHandle ph =
00506 CPluginRegistry::GetPlugin("CDataPlugin_FileImport_FiveColumn");
00507 if (ph) {
00508 CPluginCommand::ECommand cmd = CPluginCommand::eCommand_load;
00509 if (project) {
00510 cmd = CPluginCommand::eCommand_import;
00511 }
00512 msg = ph.CreateMessage(cmd);
00513 disp_flags |= CPluginUtils::fResolveArgs;
00514 }
00515 }}
00516 break;
00517
00518
00519
00520
00521 case eFormat_Table:
00522 {{
00523 CPluginHandle ph =
00524 CPluginRegistry::GetPlugin("CDataPlugin_TableReader");
00525 if (ph) {
00526 CPluginCommand::ECommand cmd = CPluginCommand::eCommand_load;
00527 if (project) {
00528 cmd = CPluginCommand::eCommand_import;
00529 }
00530 msg = ph.CreateMessage(cmd);
00531
00532
00533
00534 }
00535 }}
00536 break;
00537
00538
00539
00540
00541 case eFormat_Taxplot:
00542 {{
00543 CPluginHandle ph =
00544 CPluginRegistry::GetPlugin("CDataPlugin_ShiReader");
00545 if (ph) {
00546 CPluginCommand::ECommand cmd = CPluginCommand::eCommand_load;
00547 if (project) {
00548 cmd = CPluginCommand::eCommand_import;
00549 }
00550 msg = ph.CreateMessage(cmd);
00551 disp_flags |= CPluginUtils::fResolveArgs;
00552 }
00553 }}
00554 break;
00555
00556
00557
00558
00559 case eFormat_GeneMark:
00560 {{
00561 CPluginHandle ph =
00562 CPluginRegistry::GetPlugin("CDataPlugin_GeneMark");
00563 if (ph) {
00564 CPluginCommand::ECommand cmd = CPluginCommand::eCommand_load;
00565 if (project) {
00566 cmd = CPluginCommand::eCommand_import;
00567 }
00568 msg = ph.CreateMessage(cmd);
00569 disp_flags |= CPluginUtils::fResolveArgs;
00570 }
00571 }}
00572 break;
00573
00574
00575
00576
00577 case eFormat_SNPMarkers:
00578 {{
00579 CPluginHandle ph =
00580 CPluginRegistry::GetPlugin("CDataPlugin_FileImport_Smk");
00581 if (ph) {
00582 CPluginCommand::ECommand cmd = CPluginCommand::eCommand_load;
00583 if (project) {
00584 cmd = CPluginCommand::eCommand_import;
00585 }
00586 msg = ph.CreateMessage(cmd);
00587
00588 disp_flags |= CPluginUtils::fResolveArgs;
00589 }
00590 }}
00591 break;
00592
00593 default:
00594 case eFormat_Unknown:
00595 {{
00596 string str("Cannot guess file type from\n");
00597 str += fname;
00598 reply.AddAction(CPluginReplyAction::e_Error, str);
00599 }}
00600 break;
00601 }
00602
00603 if (msg) {
00604 msg->SetRequest().SetCommand()["file"].SetInputFile(fname);
00605 if (project) {
00606 msg->SetRequest().SetCommand()["project"].SetProject(*project);
00607 }
00608 CPluginUtils::CallPlugin(*msg,
00609 CPluginUtils::eDispatch_Deferred,
00610 disp_flags);
00611 reply.SetStatus(eMessageStatus_success);
00612 } else {
00613 string msg = "Can't interpret file " + fname;
00614 reply.AddAction(CPluginReplyAction::e_Error, msg);
00615 }
00616 } else {
00617
00618 string msg = string("File \"") + fname + string("\" does not exist,");
00619 reply.AddAction(CPluginReplyAction::e_Error, msg);
00620 }
00621 }
00622
00623
00624 CDataPlugin_FileImport::EFormat
00625 CDataPlugin_FileImport::x_GetFormat(CNcbiIfstream& istr)
00626 {
00627
00628
00629
00630
00631
00632
00633
00634
00635
00636
00637 CT_POS_TYPE orig_pos = istr.tellg();
00638
00639 CFormatGuess fg( istr );
00640 CFormatGuess::EFormat fmt = fg.GuessFormat();
00641 switch (fmt) {
00642 case CFormatGuess::eBinaryASN:
00643 return eFormat_AsnBinary;
00644 case CFormatGuess::eTextASN:
00645 return eFormat_AsnText;
00646 case CFormatGuess::eXml:
00647 return eFormat_XML;
00648 case CFormatGuess::eFasta:
00649 return eFormat_FastA;
00650 case CFormatGuess::eRmo:
00651 return eFormat_RepeatMasker;
00652 case CFormatGuess::ePhrapAce:
00653 return eFormat_Phrap;
00654 case CFormatGuess::eAgp:
00655 return eFormat_AGP;
00656 case CFormatGuess::eGtf:
00657 return eFormat_GTF;
00658 case CFormatGuess::eGlimmer3:
00659 return eFormat_Glimmer3;
00660 case CFormatGuess::eNewick:
00661 return eFormat_NewickTree;
00662 case CFormatGuess::eDistanceMatrix:
00663 return eFormat_DistanceMatrix;
00664 case CFormatGuess::eFiveColFeatureTable:
00665 return eFormat_FiveColumn;
00666 case CFormatGuess::eTaxplot:
00667 return eFormat_Taxplot;
00668 case CFormatGuess::eTable:
00669 return eFormat_Table;
00670 #if 0
00671 case CFormatGuess::eSnpMarkers:
00672 return eFormat_SNPMarkers;
00673 #endif
00674
00675 default:
00676 break;
00677 }
00678
00679 return eFormat_Unknown;
00680 }
00681
00682
00683 END_NCBI_SCOPE
00684
00685