/*----------------------------------------------------------------------*\ | Code to fix metadata originally created with DOCUMENT | | | | Peter N. Schweitzer (U.S. Geological Survey, Reston, VA 20192) | \*----------------------------------------------------------------------*/ #include #include #include #include #include "keyword.h" #include "item.h" #include "stricmp.h" extern FILE *out; /*----------------------------------------------------------------------*\ \*----------------------------------------------------------------------*/ static void clear_junk (struct item *p) { struct item *q; /* Get rid of any leading blank lines */ if (q = p->child) while (q->key == Wblank) { p->child = q->next; if (q->next) q->next->prev = NULL; q = q->next; } /* Get rid of any trailing blank lines */ if (q = p->child) { while (q->next) q = q->next; while (q && q->key == Wblank) if (q = q->prev) q->next = NULL; else { p->child = NULL; break; } } /* Now excise the useless data */ if (q = p->child) if (q->key == Wunknown && q->next == NULL) if (q->d) if ((stricmp (q->d,"n/a") == 0) || (stricmp (q->d,"not applicable") == 0) || (stricmp (q->d,"unknown") == 0)) p->child = NULL; /* Proceed to the non-scalar children, if there are any */ if (q = p->child) if (q->key != Wunknown) for (q=p->child; q; q=q->next) clear_junk (q); } /*----------------------------------------------------------------------*\ \*----------------------------------------------------------------------*/ void fixdoc (struct item *root) { struct item *p,*q,*r,*u,*v,*w; char *s,*t; int procedures_used; int revisions; int reviews; int related; int references; int notes; if (root == NULL) return; if (out == NULL) out = stderr; /*------------------------------------------------------------------*\ | Traverse the entire tree looking for elements that contain only | | "n/a", "not applicable", or "unknown". Clear those text values. | \*------------------------------------------------------------------*/ clear_junk (root); /*------------------------------------------------------------------*\ | Find Procedures_Used within Identification_Information; text in | | this section should be moved into a Process_Step. | \*------------------------------------------------------------------*/ procedures_used = key_of ("Procedures_Used"); if (procedures_used != Wunknown) if (p = find_key (root,WIdentification_Information)) /*----------------------------------------------------------*\ | Find each Procedures_Used element | \*----------------------------------------------------------*/ if (q = find_key (p,procedures_used)) { do { detach_item (q); if (r = q->child) { /*--------------------------------------------------*\ | Skip over blanks to the text, if any. | \*--------------------------------------------------*/ while (r && r->key == Wblank) r = r->next; if (r) if (r->prev) if (r->prev->key == Wblank) r->prev = NULL; /*--------------------------------------------------*\ | If there is some text, create a new Process_Step | | within the Lineage, and put the text under it. | | You may need to create a Lineage and possibly a | | Data_Quality_Information as well. | \*--------------------------------------------------*/ if (r->key == Wunknown) { u = find_key (root,WLineage); if (!u) { v = find_key (root,WData_Quality_Information); if (!v) { v = insert_item_after (p); v->key = WData_Quality_Information; } u = add_child (v); u->key = WLineage; } v = add_child (u); v->key = WProcess_Step; w = add_child (v); w->key = WProcess_Description; w->child = r; while (r) { r->parent = w; r = r->next; } } } /*--------------------------------------------------*\ | We can use find_key here because first thing we | | do when we find it is detach it, so the next | | occurrence becomes "the first available". | \*--------------------------------------------------*/ } while (q = find_key (p,procedures_used)); } /*------------------------------------------------------------------*\ | Find Revisions within Identification_Information; text in this | | section should also be moved into a Process_Step. | \*------------------------------------------------------------------*/ revisions = key_of ("Revisions"); if (revisions != Wunknown) if (p = find_key (root,WIdentification_Information)) /*----------------------------------------------------------*\ | Find each Revisions element | \*----------------------------------------------------------*/ if (q = find_key (p,revisions)) { do { detach_item (q); if (r = q->child) { /*--------------------------------------------------*\ | Skip over blanks to the text, if any. | \*--------------------------------------------------*/ while (r && r->key == Wblank) r = r->next; if (r) if (r->prev) if (r->prev->key == Wblank) r->prev = NULL; /*--------------------------------------------------*\ | If there is some text, create a new Process_Step | | within the Lineage, and put the text under it. | | You may need to create a Lineage and possibly a | | Data_Quality_Information as well. | \*--------------------------------------------------*/ if (r->key == Wunknown) { u = find_key (root,WLineage); if (!u) { v = find_key (root,WData_Quality_Information); if (!v) { v = insert_item_after (p); v->key = WData_Quality_Information; } u = add_child (v); u->key = WLineage; } v = add_child (u); v->key = WProcess_Step; w = add_child (v); w->key = WProcess_Description; w->child = r; while (r) { r->parent = w; r = r->next; } } } /*--------------------------------------------------*\ | We can use find_key here because first thing we | | do when we find it is detach it, so the next | | occurrence becomes "the first available". | \*--------------------------------------------------*/ } while (q = find_key (p,revisions)); } /*------------------------------------------------------------------*\ | Find Reviews_Applied_to_Data within Identification_Information; | | text in this section should also be moved into a Process_Step. | \*------------------------------------------------------------------*/ reviews = key_of ("Reviews_Applied_to_Data"); if (reviews != Wunknown) if (p = find_key (root,WIdentification_Information)) /*----------------------------------------------------------*\ | Find each Revisions element | \*----------------------------------------------------------*/ if (q = find_key (p,reviews)) { do { detach_item (q); if (r = q->child) { /*--------------------------------------------------*\ | Skip over blanks to the text, if any. | \*--------------------------------------------------*/ while (r && r->key == Wblank) r = r->next; if (r) if (r->prev) if (r->prev->key == Wblank) r->prev = NULL; /*--------------------------------------------------*\ | If there is some text, create a new Process_Step | | within the Lineage, and put the text under it. | | You may need to create a Lineage and possibly a | | Data_Quality_Information as well. | \*--------------------------------------------------*/ if (r->key == Wunknown) { u = find_key (root,WLineage); if (!u) { v = find_key (root,WData_Quality_Information); if (!v) { v = insert_item_after (p); v->key = WData_Quality_Information; } u = add_child (v); u->key = WLineage; } v = add_child (u); v->key = WProcess_Step; w = add_child (v); w->key = WProcess_Description; w->child = r; while (r) { r->parent = w; r = r->next; } } } /*--------------------------------------------------*\ | We can use find_key here because first thing we | | do when we find it is detach it, so the next | | occurrence becomes "the first available". | \*--------------------------------------------------*/ } while (q = find_key (p,reviews)); } /*------------------------------------------------------------------*\ | Find Related_Spatial_and_Tabular_Data_Sets within | | Identification_Information; text in these sections should really | | be structured as Cross_References, but we cannot expect them to | | contain any parseable content. Here we move them into the | | Other_Citation_Details section of a dummy Cross_Reference. | | | | This is not ideal, but is the most we can assume for now. | \*------------------------------------------------------------------*/ related = key_of ("Related_Spatial_and_Tabular_Data_Sets"); if (related != Wunknown) if (p = find_key (root,WIdentification_Information)) /*----------------------------------------------------------*\ | Find each Related_Spatial_and_Tabular_Data_Sets element | \*----------------------------------------------------------*/ if (q = find_key (p,related)) { do { detach_item (q); if (r = q->child) { /*--------------------------------------------------*\ | Skip over blanks to the text, if any. | \*--------------------------------------------------*/ while (r && r->key == Wblank) r = r->next; if (r) if (r->prev) if (r->prev->key == Wblank) r->prev = NULL; /*--------------------------------------------------*\ | If there is any text, create a Cross_Reference | | within the Identification_Information, add a | | Citation_Information under it, and put the whole | | text under its Other_Citation_Details. | \*--------------------------------------------------*/ if (r->key == Wunknown) { u = add_child (p); u->key = WCross_Reference; v = add_child (u); v->key = WCitation_Information; w = add_child (v); w->key = WOther_Citation_Details; w->child = r; while (r) { r->parent = w; r = r->next; } } } /*--------------------------------------------------*\ | We can use find_key here because first thing we | | do when we find it is detach it, so the next | | occurrence becomes "the first available". | \*--------------------------------------------------*/ } while (q = find_key (p,related)); } /*------------------------------------------------------------------*\ | Find Other_References_Cited within Identification_Information; | | text in these sections should really be structured as normal | | Cross_References, but we cannot expect them to contain any | | parseable content. Again we move them into the unstructured | | Other_Citation_Details section of a dummy Cross_Reference. | | | | This is not ideal, but is the most we can assume for now. | \*------------------------------------------------------------------*/ references = key_of ("Other_References_Cited"); if (references != Wunknown) if (p = find_key (root,WIdentification_Information)) /*----------------------------------------------------------*\ | Find each Other_References_Cited element | \*----------------------------------------------------------*/ if (q = find_key (p,references)) { do { detach_item (q); if (r = q->child) { /*--------------------------------------------------*\ | Skip over blanks to the text, if any. | \*--------------------------------------------------*/ while (r && r->key == Wblank) r = r->next; if (r) if (r->prev) if (r->prev->key == Wblank) r->prev = NULL; /*--------------------------------------------------*\ | If there is any text, create a Cross_Reference | | within the Identification_Information, add a | | Citation_Information under it, and put the whole | | text under its Other_Citation_Details. | \*--------------------------------------------------*/ if (r->key == Wunknown) { u = add_child (p); u->key = WCross_Reference; v = add_child (u); v->key = WCitation_Information; w = add_child (v); w->key = WOther_Citation_Details; w->child = r; while (r) { r->parent = w; r = r->next; } } } /*--------------------------------------------------*\ | We can use find_key here because first thing we | | do when we find it is detach it, so the next | | occurrence becomes "the first available". | \*--------------------------------------------------*/ } while (q = find_key (p,references)); } /*------------------------------------------------------------------*\ | Find Notes within Identification_Information; text in this | | section should be added to Supplemental_Information. | \*------------------------------------------------------------------*/ notes = key_of ("Notes"); if (notes != Wunknown) if (p = find_key (root,WIdentification_Information)) /*----------------------------------------------------------*\ | Find each Notes element | \*----------------------------------------------------------*/ if (q = find_key (p,notes)) { do { detach_item (q); if (r = q->child) { /*--------------------------------------------------*\ | Skip over blanks to the text, if any. | \*--------------------------------------------------*/ while (r && r->key == Wblank) r = r->next; if (r) if (r->prev) if (r->prev->key == Wblank) r->prev = NULL; /*--------------------------------------------------*\ | If there is some text, append it to the existing | | Supplemental_Information element (create this if | | it does not already exist). | \*--------------------------------------------------*/ if (r->key == Wunknown) { u = find_key (p,WSupplemental_Information); if (!u) { u = add_child (p); u->key = WSupplemental_Information; } if (v = u->child) { while (v->next) v = v->next; v->next = r; r->prev = v; } else u->child = r; while (r) { r->parent = u; r = r->next; } } } /*--------------------------------------------------*\ | We can use find_key here because first thing we | | do when we find it is detach it, so the next | | occurrence becomes "the first available". | \*--------------------------------------------------*/ } while (q = find_key (p,notes)); } /*------------------------------------------------------------------*\ | Delete meaningless attribute information | | Within each Detailed_Description, for each Attribute, if the | | value of Attribute_Label is one of the following, detach the | | entire Attribute: - AREA PERIMETER LENGTH FNODE# TNODE# cover# | \*------------------------------------------------------------------*/ if (p = find_key (root,WEntity_and_Attribute_Information)) if (p = find_key (p,WDetailed_Description)) do { if (q = find_key (p,WAttribute)) while (q) { int removed = 0; u = NULL; if (r = find_key (q,WAttribute_Label)) if (r = r->child) if (s = r->d) { if (*s) t = s + strlen (s) - 1; else t = s; if ((strcmp (s,"-") == 0) || (strcmp (s,"AREA") == 0) || (strcmp (s,"PERIMETER") == 0) || (strcmp (s,"LENGTH") == 0) || (*t == '#')) { u = q->next; detach_item (q); removed = 1; } } if (removed) q = u; else q = q->next; } } while (p = find_next_key (p,WDetailed_Description)); /*------------------------------------------------------------------*\ | Remove useless security information that DOCUMENT puts in the | | Identification_Information and Metadata_Reference_Information. | \*------------------------------------------------------------------*/ if (p = find_key (root,WIdentification_Information)) if (p = find_key (p,WSecurity_Information)) { int strike = 0; if (q = find_key (p,WSecurity_Classification_System)) if (r = q->child) if (s = r->d) if (stricmp (s,"None") == 0) strike++; if (q = find_key (p,WSecurity_Classification)) if (r = q->child) if (s = r->d) if (stricmp (s,"UNCLASSIFIED") == 0) strike++; if (q = find_key (p,WSecurity_Handling_Description)) if (r = q->child) if (s = r->d) if (stricmp (s,"None") == 0) strike++; if (strike == 3) detach_item (p); } if (p = find_key (root,WMetadata_Reference_Information)) if (p = find_key (p,WMetadata_Security_Information)) { int strike = 0; if (q = find_key (p,WMetadata_Security_Classification_System)) if (r = q->child) if (s = r->d) if (stricmp (s,"None") == 0) strike++; if (q = find_key (p,WMetadata_Security_Classification)) if (r = q->child) if (s = r->d) if (stricmp (s,"UNCLASSIFIED") == 0) strike++; if (q = find_key (p,WMetadata_Security_Handling_Description)) if (r = q->child) if (s = r->d) if (stricmp (s,"None") == 0) strike++; if (strike == 3) detach_item (p); } /*------------------------------------------------------------------*\ | Prune the tree from the root to remove all of the empty stuff. | \*------------------------------------------------------------------*/ prune (root); } /*----------------------------------------------------------------------*\ \*----------------------------------------------------------------------*/