/*----------------------------------------------------------------------*\
 | Code to fix metadata originally created with DOCUMENT				|
 |																		|
 | Peter N. Schweitzer (U.S. Geological Survey, Reston, VA 20192)		|
\*----------------------------------------------------------------------*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include "keyword.h"
#include "item.h"
#include "stricmp.h"

extern FILE *out;

/*----------------------------------------------------------------------*\
\*----------------------------------------------------------------------*/

static void clear_junk (struct item *p) {
	struct item *q;

	/* Get rid of any leading blank lines */

	if (q = p->child)
		while (q->key == Wblank) {
			p->child = q->next;
			if (q->next) q->next->prev = NULL;
			q = q->next;
			}

	/* Get rid of any trailing blank lines */

	if (q = p->child) {
		while (q->next) q = q->next;
		while (q && q->key == Wblank)
			if (q = q->prev) q->next = NULL;
			else {
				p->child = NULL;
				break;
				}
		}

	/* Now excise the useless data */

	if (q = p->child)
		if (q->key == Wunknown && q->next == NULL)
			if (q->d)
				if ((stricmp (q->d,"n/a") == 0) ||
					(stricmp (q->d,"not applicable") == 0) ||
					(stricmp (q->d,"unknown") == 0))
					p->child = NULL;

	/* Proceed to the non-scalar children, if there are any */

	if (q = p->child)
		if (q->key != Wunknown)
			for (q=p->child; q; q=q->next) clear_junk (q);
	}

/*----------------------------------------------------------------------*\
\*----------------------------------------------------------------------*/

void fixdoc (struct item *root) {
	struct item *p,*q,*r,*u,*v,*w;
	char *s,*t;
	int procedures_used;
	int revisions;
	int reviews;
	int related;
	int references;
	int notes;

	if (root == NULL) return;

	if (out == NULL) out = stderr;

	/*------------------------------------------------------------------*\
	 | Traverse the entire tree looking for elements that contain only	|
	 | "n/a", "not applicable", or "unknown".  Clear those text values.	|
	\*------------------------------------------------------------------*/

	clear_junk (root);

	/*------------------------------------------------------------------*\
	 | Find Procedures_Used within Identification_Information; text in	|
	 | this section should be moved into a Process_Step.				|
	\*------------------------------------------------------------------*/

	procedures_used = key_of ("Procedures_Used");
	if (procedures_used != Wunknown)
		if (p = find_key (root,WIdentification_Information))

			/*----------------------------------------------------------*\
			 | Find each Procedures_Used element						|
			\*----------------------------------------------------------*/

			if (q = find_key (p,procedures_used)) {
				do {
					detach_item (q);
					if (r = q->child) {

						/*--------------------------------------------------*\
						 | Skip over blanks to the text, if any.			|
						\*--------------------------------------------------*/

						while (r && r->key == Wblank) r = r->next;
						if (r)
							if (r->prev)
								if (r->prev->key == Wblank) r->prev = NULL;

						/*--------------------------------------------------*\
						 | If there is some text, create a new Process_Step	|
						 | within the Lineage, and put the text under it.	|
						 | You may need to create a Lineage and possibly a	|
						 | Data_Quality_Information as well.				|
						\*--------------------------------------------------*/

						if (r->key == Wunknown) {

							u = find_key (root,WLineage);
							if (!u) {
								v = find_key (root,WData_Quality_Information);
								if (!v) {
									v = insert_item_after (p);
									v->key = WData_Quality_Information;
									}
								u = add_child (v);
								u->key = WLineage;
								}
							v = add_child (u);
							v->key = WProcess_Step;
							w = add_child (v);
							w->key = WProcess_Description;
							w->child = r;
							while (r) {
								r->parent = w;
								r = r->next;
								}
							}
						}

					/*--------------------------------------------------*\
					 | We can use find_key here because first thing we	|
					 | do when we find it is detach it, so the next		|
					 | occurrence becomes "the first available".		|
					\*--------------------------------------------------*/

					} while (q = find_key (p,procedures_used));
				}

	/*------------------------------------------------------------------*\
	 | Find Revisions within Identification_Information; text in this	|
	 | section should also be moved into a Process_Step.				|
	\*------------------------------------------------------------------*/

	revisions = key_of ("Revisions");
	if (revisions != Wunknown)
		if (p = find_key (root,WIdentification_Information))

			/*----------------------------------------------------------*\
			 | Find each Revisions element								|
			\*----------------------------------------------------------*/

			if (q = find_key (p,revisions)) {
				do {
					detach_item (q);
					if (r = q->child) {

						/*--------------------------------------------------*\
						 | Skip over blanks to the text, if any.			|
						\*--------------------------------------------------*/

						while (r && r->key == Wblank) r = r->next;
						if (r)
							if (r->prev)
								if (r->prev->key == Wblank) r->prev = NULL;

						/*--------------------------------------------------*\
						 | If there is some text, create a new Process_Step	|
						 | within the Lineage, and put the text under it.	|
						 | You may need to create a Lineage and possibly a	|
						 | Data_Quality_Information as well.				|
						\*--------------------------------------------------*/

						if (r->key == Wunknown) {

							u = find_key (root,WLineage);
							if (!u) {
								v = find_key (root,WData_Quality_Information);
								if (!v) {
									v = insert_item_after (p);
									v->key = WData_Quality_Information;
									}
								u = add_child (v);
								u->key = WLineage;
								}
							v = add_child (u);
							v->key = WProcess_Step;
							w = add_child (v);
							w->key = WProcess_Description;
							w->child = r;
							while (r) {
								r->parent = w;
								r = r->next;
								}
							}
						}

					/*--------------------------------------------------*\
					 | We can use find_key here because first thing we	|
					 | do when we find it is detach it, so the next		|
					 | occurrence becomes "the first available".		|
					\*--------------------------------------------------*/

					} while (q = find_key (p,revisions));
				}

	/*------------------------------------------------------------------*\
	 | Find Reviews_Applied_to_Data within Identification_Information;	|
	 | text in this section should also be moved into a Process_Step.	|
	\*------------------------------------------------------------------*/

	reviews = key_of ("Reviews_Applied_to_Data");
	if (reviews != Wunknown)
		if (p = find_key (root,WIdentification_Information))

			/*----------------------------------------------------------*\
			 | Find each Revisions element								|
			\*----------------------------------------------------------*/

			if (q = find_key (p,reviews)) {
				do {
					detach_item (q);
					if (r = q->child) {

						/*--------------------------------------------------*\
						 | Skip over blanks to the text, if any.			|
						\*--------------------------------------------------*/

						while (r && r->key == Wblank) r = r->next;
						if (r)
							if (r->prev)
								if (r->prev->key == Wblank) r->prev = NULL;

						/*--------------------------------------------------*\
						 | If there is some text, create a new Process_Step	|
						 | within the Lineage, and put the text under it.	|
						 | You may need to create a Lineage and possibly a	|
						 | Data_Quality_Information as well.				|
						\*--------------------------------------------------*/

						if (r->key == Wunknown) {

							u = find_key (root,WLineage);
							if (!u) {
								v = find_key (root,WData_Quality_Information);
								if (!v) {
									v = insert_item_after (p);
									v->key = WData_Quality_Information;
									}
								u = add_child (v);
								u->key = WLineage;
								}
							v = add_child (u);
							v->key = WProcess_Step;
							w = add_child (v);
							w->key = WProcess_Description;
							w->child = r;
							while (r) {
								r->parent = w;
								r = r->next;
								}
							}
						}

					/*--------------------------------------------------*\
					 | We can use find_key here because first thing we	|
					 | do when we find it is detach it, so the next		|
					 | occurrence becomes "the first available".		|
					\*--------------------------------------------------*/

					} while (q = find_key (p,reviews));
				}

	/*------------------------------------------------------------------*\
	 | Find Related_Spatial_and_Tabular_Data_Sets within				|
	 | Identification_Information; text in these sections should really	|
	 | be structured as Cross_References, but we cannot expect them to	|
	 | contain any parseable content.  Here we move them into the		|
	 | Other_Citation_Details section of a dummy Cross_Reference.		|
	 |																	|
	 | This is not ideal, but is the most we can assume for now.		|
	\*------------------------------------------------------------------*/

	related = key_of ("Related_Spatial_and_Tabular_Data_Sets");
	if (related != Wunknown)
		if (p = find_key (root,WIdentification_Information))

			/*----------------------------------------------------------*\
			 | Find each Related_Spatial_and_Tabular_Data_Sets element	|
			\*----------------------------------------------------------*/

			if (q = find_key (p,related)) {
				do {
					detach_item (q);
					if (r = q->child) {

						/*--------------------------------------------------*\
						 | Skip over blanks to the text, if any.			|
						\*--------------------------------------------------*/

						while (r && r->key == Wblank) r = r->next;
						if (r)
							if (r->prev)
								if (r->prev->key == Wblank) r->prev = NULL;

						/*--------------------------------------------------*\
						 | If there is any text, create a Cross_Reference	|
						 | within the Identification_Information, add a		|
						 | Citation_Information under it, and put the whole	|
						 | text under its Other_Citation_Details.			|
						\*--------------------------------------------------*/

						if (r->key == Wunknown) {

							u = add_child (p);
							u->key = WCross_Reference;
							v = add_child (u);
							v->key = WCitation_Information;
							w = add_child (v);
							w->key = WOther_Citation_Details;
							w->child = r;
							while (r) {
								r->parent = w;
								r = r->next;
								}
							}
						}

					/*--------------------------------------------------*\
					 | We can use find_key here because first thing we	|
					 | do when we find it is detach it, so the next		|
					 | occurrence becomes "the first available".		|
					\*--------------------------------------------------*/

					} while (q = find_key (p,related));
				}

	/*------------------------------------------------------------------*\
	 | Find Other_References_Cited within Identification_Information;	|
	 | text in these sections should really be structured as normal		|
	 | Cross_References, but we cannot expect them to contain any		|
	 | parseable content.  Again we move them into the unstructured		|
	 | Other_Citation_Details section of a dummy Cross_Reference.		|
	 |																	|
	 | This is not ideal, but is the most we can assume for now.		|
	\*------------------------------------------------------------------*/

	references = key_of ("Other_References_Cited");
	if (references != Wunknown)
		if (p = find_key (root,WIdentification_Information))

			/*----------------------------------------------------------*\
			 | Find each Other_References_Cited element					|
			\*----------------------------------------------------------*/

			if (q = find_key (p,references)) {
				do {
					detach_item (q);
					if (r = q->child) {

						/*--------------------------------------------------*\
						 | Skip over blanks to the text, if any.			|
						\*--------------------------------------------------*/

						while (r && r->key == Wblank) r = r->next;
						if (r)
							if (r->prev)
								if (r->prev->key == Wblank) r->prev = NULL;

						/*--------------------------------------------------*\
						 | If there is any text, create a Cross_Reference	|
						 | within the Identification_Information, add a		|
						 | Citation_Information under it, and put the whole	|
						 | text under its Other_Citation_Details.			|
						\*--------------------------------------------------*/

						if (r->key == Wunknown) {

							u = add_child (p);
							u->key = WCross_Reference;
							v = add_child (u);
							v->key = WCitation_Information;
							w = add_child (v);
							w->key = WOther_Citation_Details;
							w->child = r;
							while (r) {
								r->parent = w;
								r = r->next;
								}
							}
						}

					/*--------------------------------------------------*\
					 | We can use find_key here because first thing we	|
					 | do when we find it is detach it, so the next		|
					 | occurrence becomes "the first available".		|
					\*--------------------------------------------------*/

					} while (q = find_key (p,references));
				}

	/*------------------------------------------------------------------*\
	 | Find Notes within Identification_Information; text in this		|
	 | section should be added to Supplemental_Information.				|
	\*------------------------------------------------------------------*/

	notes = key_of ("Notes");
	if (notes != Wunknown)
		if (p = find_key (root,WIdentification_Information))

			/*----------------------------------------------------------*\
			 | Find each Notes element									|
			\*----------------------------------------------------------*/

			if (q = find_key (p,notes)) {
				do {
					detach_item (q);
					if (r = q->child) {

						/*--------------------------------------------------*\
						 | Skip over blanks to the text, if any.			|
						\*--------------------------------------------------*/

						while (r && r->key == Wblank) r = r->next;
						if (r)
							if (r->prev)
								if (r->prev->key == Wblank) r->prev = NULL;

						/*--------------------------------------------------*\
						 | If there is some text, append it to the existing	|
						 | Supplemental_Information element (create this if	|
						 | it does not already exist).						|
						\*--------------------------------------------------*/

						if (r->key == Wunknown) {

							u = find_key (p,WSupplemental_Information);
							if (!u) {
								u = add_child (p);
								u->key = WSupplemental_Information;
								}
							if (v = u->child) {
								while (v->next) v = v->next;
								v->next = r;
								r->prev = v;
								}
							else u->child = r;

							while (r) {
								r->parent = u;
								r = r->next;
								}
							}
						}

					/*--------------------------------------------------*\
					 | We can use find_key here because first thing we	|
					 | do when we find it is detach it, so the next		|
					 | occurrence becomes "the first available".		|
					\*--------------------------------------------------*/

					} while (q = find_key (p,notes));
				}

	/*------------------------------------------------------------------*\
	 | Delete meaningless attribute information							|
	 | Within each Detailed_Description, for each Attribute, if the		|
	 | value of Attribute_Label is one of the following, detach the		|
	 | entire Attribute: - AREA PERIMETER LENGTH FNODE# TNODE# cover#	|
	\*------------------------------------------------------------------*/

	if (p = find_key (root,WEntity_and_Attribute_Information))
		if (p = find_key (p,WDetailed_Description))
			do {
				if (q = find_key (p,WAttribute))
					while (q) {
						int removed = 0;
						u = NULL;
						if (r = find_key (q,WAttribute_Label))
							if (r = r->child)
								if (s = r->d) {
									if (*s) t = s + strlen (s) - 1; else t = s;
									if ((strcmp (s,"-") == 0) ||
										(strcmp (s,"AREA") == 0) ||
										(strcmp (s,"PERIMETER") == 0) ||
										(strcmp (s,"LENGTH") == 0) ||
										(*t == '#')) {
										u = q->next;
										detach_item (q);
										removed = 1;
										}
									}
						if (removed) q = u; else q = q->next;
						}
				} while (p = find_next_key (p,WDetailed_Description));

	/*------------------------------------------------------------------*\
	 | Remove useless security information that DOCUMENT puts in the	|
	 | Identification_Information and Metadata_Reference_Information.	|
	\*------------------------------------------------------------------*/

	if (p = find_key (root,WIdentification_Information))
		if (p = find_key (p,WSecurity_Information)) {
			int strike = 0;
			if (q = find_key (p,WSecurity_Classification_System))
				if (r = q->child)
					if (s = r->d)
						if (stricmp (s,"None") == 0) strike++;
			if (q = find_key (p,WSecurity_Classification))
				if (r = q->child)
					if (s = r->d)
						if (stricmp (s,"UNCLASSIFIED") == 0) strike++;
			if (q = find_key (p,WSecurity_Handling_Description))
				if (r = q->child)
					if (s = r->d)
						if (stricmp (s,"None") == 0) strike++;
			if (strike == 3) detach_item (p);
			}

	if (p = find_key (root,WMetadata_Reference_Information))
		if (p = find_key (p,WMetadata_Security_Information)) {
			int strike = 0;
			if (q = find_key (p,WMetadata_Security_Classification_System))
				if (r = q->child)
					if (s = r->d)
						if (stricmp (s,"None") == 0) strike++;
			if (q = find_key (p,WMetadata_Security_Classification))
				if (r = q->child)
					if (s = r->d)
						if (stricmp (s,"UNCLASSIFIED") == 0) strike++;
			if (q = find_key (p,WMetadata_Security_Handling_Description))
				if (r = q->child)
					if (s = r->d)
						if (stricmp (s,"None") == 0) strike++;
			if (strike == 3) detach_item (p);
			}


	/*------------------------------------------------------------------*\
	 | Prune the tree from the root to remove all of the empty stuff.	|
	\*------------------------------------------------------------------*/

	prune (root);

	}

/*----------------------------------------------------------------------*\
\*----------------------------------------------------------------------*/