static char rcsver[] = "$Id: dataset.c,v 1.5 2003/06/26 14:42:47 schinder Exp $";
 
/**
 ** $Source: /data/cvs/vanilla/dataset.c,v $
 **
 ** $Log: dataset.c,v $
 ** Revision 1.5  2003/06/26 14:42:47  schinder
 ** Fixes, for upcase/lowcase table names in DATASET problem, and for subtraction of pointers to get strlen problem.  Also, TES specific files have been removed from CIRS build
 **
 ** Revision 1.4  2003/06/23 22:34:59  schinder
 ** Fixed stupid reverse comparison so that putting a full path to a DATASET now works again
 **
 ** Revision 1.3  2003/04/30 23:23:06  schinder
 ** Fixes to dataset.c
 **
 ** Revision 1.2  2003/04/30 21:29:31  schinder
 ** Changes to force absolute pathnames in dataset.c
 **
 ** Revision 1.1.1.1  2003/02/25 18:54:45  schinder
 ** Adding vanilla
 **
 ** Revision 1.14  2002/06/14 15:51:59  saadat
 ** Logging code fixed.
 **
 ** Revision 1.13  2002/06/13 20:25:25  saadat
 ** Added a new command-line option, "-files" which allows vanilla to
 ** interpret a list of files specified on the command-line as a table.
 **
 ** Logging code still needs fixing and the indexing code needs checks put
 ** in to avoid users use indexing options incorrectly.
 **
 ** Revision 1.12  2002/06/06 02:20:13  saadat
 ** Added support for detached label data files.
 **
 ** Revision 1.11  2002/06/04 23:55:40  saadat
 ** Absolute paths starting with drive letters were not working correctly in
 ** DOS/Windows DATASET files.
 **
 ** Revision 1.10  2002/01/12 01:44:00  saadat
 ** vanilla failed on a non-existent directory in the dataset.
 **
 ** Revision 1.9  2001/11/28 20:22:39  saadat
 ** Merged branch vanilla-3-3-13-key-alias-fix at vanilla-3-3-13-4 with
 ** vanilla-3-4-6.
 **
 ** Revision 1.8  2000/08/04 01:50:52  saadat
 ** Rename STATS to LOGGING.
 ** Rename hstats.c to logging.c
 **
 ** Revision 1.7  2000/08/04 00:52:14  saadat
 ** Added a preliminary version of logging/statistics gathering on normal
 ** termination through exit() and on receipt of the following signals:
 **              SIGSEGV, SIGBUS, SIGPIPE, SIGXCPU, SIGXFSZ
 **
 ** Revision 1.6.2.1  2001/05/22 03:58:07  saadat
 ** Windows 95 stat() fails if the path is suffixed with a "/" (e.g. stat
 ** on "/vanilla/data/" will yield -1). Windows NT does not complain
 ** however.
 **
 ** Revision 1.6  2000/07/12 10:34:39  gorelick
 ** Moves SortFiles out of LoadFilenames
 **
 ** Revision 1.5  2000/07/07 17:15:22  saadat
 ** Added support for Windows MS Visual C++ memory-mapped file support.
 ** Removed some unused variables from various files.
 **
 ** Revision 1.4  2000/05/26 15:04:51  asbms
 ** Added fixes for smoother return from errors
 **
 ** Revision 1.3  2000/05/25 02:24:23  saadat
 ** Fixed IRTM temperature generation.
 **
 ** Revision 1.2  1999/11/19 21:19:43  gorelick
 ** Version 3.1, post PDS delivery of 3.0
 **/

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "header.h"
#include "proto.h"

#ifdef _WINDOWS
#include "dos.h"
#include <direct.h>
#else
#include <libgen.h>
#endif

#include "system.h"


int LoadTable(DATASET *dataset, char *path, char *name);
void rtrim(char *str, char *trim_chars);
int isAbsPathName(const char *path);
int isDriveRelPathName(const char *path);

/**
 ** Load dataset stuff
 **     Load all the table names, and all the files for each table.
 **
 ** Options:
 **
 **     table
 **     path/table
 **     path
 **
 **/

DATASET *
LoadDataset(DATASET *dataset, char *fname)
{
    char *path, *file, *dir, *tbl_name, *dir_name;
    struct stat sbuf;
    char tmppath[512], newpath[256], buf[256], *p;
    FILE *fp;

#ifdef CIRS
#define CIRS_PATH_SIZE 512
    char pwd[CIRS_PATH_SIZE];
    char buf2[CIRS_PATH_SIZE];
    char buf3[CIRS_PATH_SIZE];
#endif

    if (dataset == NULL) {
        dataset = calloc(1, sizeof(DATASET));
        dataset->tables = new_list();
        dataset->tablenames = new_list();
    }
    
#ifdef CIRS
    (void) getcwd(pwd,CIRS_PATH_SIZE);
#endif
    path = strdup(fname);
    /**
    ** Split fname into path/file
    **/
    if ((file = strrchr(path, '/')) == NULL) {
#ifdef CIRS
      dir = pwd;
#else
      dir = ".";
#endif
      file = path;
    } else {
#ifdef CIRS
    if (stat(fname, &sbuf) == 0) {
      if (!S_ISDIR(sbuf.st_mode)) { /* fname isn't a directory */
        *file = '\0';
        file++;
      }
    } else {
      fprintf(stderr,"Unable to stat %s, bailing out\n",fname);
    }
#else
        *file = '\0';
        file++;
#endif
#ifdef CIRS
	if(isAbsPathName(path)) {
	  dir = path;
	} else {
	  bzero(buf3,CIRS_PATH_SIZE);
	  (void) strcpy(buf3,pwd);
	  (void) strcat(buf3,"/");
	  (void) strcat(buf3,path);
	  dir = buf3;
	}
#else
	  dir = path;
#endif
	  
    }

	/* >> THERE ARE A BUNCH OF MEMORY LEAKS HERE << */

	/*
	** Remove trailing slashes and white spaces.
	** Windows 95/98 chokes on stat() on a path with
	** a slash at the end.
	*/
	fname = strdup(fname);
	rtrim(fname, "\\/ \t");

    if (stat(fname, &sbuf) != 0) {
        fname = find_file(fname);
        if (stat(fname, &sbuf) != 0) {
            fprintf(stderr, "Bad path: %s\n", fname);
            return (NULL);
        }
    }

    if ((sbuf.st_mode & S_IFDIR) != 0) {
#ifndef CIRS
        dir = fname;
#endif
        file = "DATASET";
    }

    sprintf(newpath, "%s/%s", dir, file);
    fname = find_file(newpath);

    if (access(fname, F_OK)) {
        sprintf(newpath, "%s/%s.LST", dir, file);
        fname = find_file(newpath);
    }

    if (access(fname, F_OK)) {
        sprintf(newpath, "%s/%s.TXT", dir, file);
        fname = find_file(newpath);
    }

#ifdef LOGGING
	invocation_log_dataset(fname);
#endif /* LOGGING */

    /**
    ** Open the DATASET file and extract its contents
    **/
    if ((fp = fopen(fname, "r")) == NULL) {
        fprintf(stderr, "Unable to open DATASET file: %s\n", fname);
        return (NULL);
    }
    while (fgets(buf, 256, fp) != NULL) {
		/*
        if ((p = strrchr(buf, '\r')) != NULL) *p = '\0';
        if ((p = strrchr(buf, '\n')) != NULL) *p = '\0';
		*/
		rtrim(buf, " \t\r\n");

        if (buf[strlen(buf) -1] == '/') {
            /**
            ** This is a path to another dataset in another directory
            **/
#ifdef CIRS
	  bzero(buf2,CIRS_PATH_SIZE);
	  if(isAbsPathName(buf)) { /* buf contains an absolute path */ 
	    strcpy(buf2,buf);
	  } else { /* buf contains a relative path */
	    (void) strcpy(buf2,dir);
	    (void) strcat(buf2,"/");
	    (void) strcat(buf2,buf);
	  }
	  LoadDataset(dataset,buf2);
#else
	  LoadDataset(dataset, buf);
#endif
        } else if (strchr(buf, '/')) {
            /**
            ** This is an explicit path to another table
            **/
            tbl_name = (char *)basename(buf);
            dir_name = (char *)dirname(buf);

				if (isAbsPathName(buf)){
                if(LoadTable(dataset, dir_name, tbl_name)){
                    fclose(fp);
                    return(NULL);
                }
            } else {
#ifdef _WINDOWS
                if(isDriveRelPathName(buf)){
					 	_getdcwd(toupper(buf[0])-'A'+1, tmppath, sizeof(tmppath)-1);
						strcat(tmppath,"/");
						strcat(tmppath, dir_name);
					 }
#else  /* UNIX */
                sprintf(tmppath, "%s/%s", dir, dir_name);
#endif /* _WINDOWS */
                if(LoadTable(dataset, tmppath, tbl_name)){
                    fclose(fp);
                    return(NULL);
                }
            }
        } else {
            /**
            ** This is the name of a table, load it directly.
            **/
#ifdef CIRS
	  /* We had a problem where a few of the directories in a
	     dataset had the uppercase name of a table in DATASET.TXT,
	     while the rest had the lower case name.  vanilla treated
	     it as two separate datasets (with the same files, since
	     the loading of files is done in a case insensitive way).
	     To avoid that, here we upcase all table names prior to
	     giving them to LoadTable */
	  char *s1;
	  s1 = buf;
	  while(*s1) {
	    *s1 = (char) toupper((int) *s1);
	    s1++;
	  }
#endif
            if(LoadTable(dataset, dir, buf)){
                fclose(fp);
                return(NULL);
            }
        }
    }
    fclose(fp);
    return(dataset);
}

DATASET *
FakeDataset(LIST *files_list)
{
    char *tbl_name;
    struct stat sbuf;
	DATASET *dataset;
	char **files;
	int n, i;

	tbl_name = "x";
	files = (char **)list_data(files_list);
	n = list_count(files_list);

	dataset = calloc(1, sizeof(DATASET));
	dataset->tables = new_list();
	dataset->tablenames = new_list();

	for (i = 0; i < n; i++){
		if (stat(files[i], &sbuf) != 0) {
			perror(files[0]);
			return NULL;
		}
		if ((sbuf.st_mode & S_IFDIR) != 0) {
			fprintf(stderr, "%s is a directory - it should be a file instead.\n", files[i]);
			return NULL;
		}
	}

#ifdef LOGGING
	/* invocation_log_dataset(path); */
#endif /* LOGGING */

	if (FakeTable(dataset, files_list, tbl_name)){
		return NULL;
	}

    return(dataset);
}

int
FakeTable(DATASET *dataset, LIST *files, char *name)
{
	TABLE *table;

	table = calloc(1, sizeof(TABLE));
	table->files = files;
        
	if (table->files == NULL || table->files->number == 0) {
		/**
		 ** No fragments found for the table, so free the 
		 ** table structure. It is a liability to keep it
		 ** around.
		 **/
		free(table);        /* this should be FreeTable */
	}
	else {
		SortFiles(table->files);
		list_add(dataset->tables, table);
		list_add(dataset->tablenames, (void *)strdup(name));

		table->label = LoadLabel((table->files->ptr)[0]);
		if (table->label == NULL) {
			free(table);
			return(1);
		}
		table->label->table = table;
	}

    return(0); /* return ok */
}


/*
** rtrim()
**
** Trims trim_chars from the right-end of the specified string
*/
void
rtrim(char *str, char *trim_chars)
{
	char *p;

	if (str){
		p = &str[strlen(str)];

		while(p != str && strchr(trim_chars, *--p)){
			*p = '\0';
		}
	}
}


TABLE *
FindTable(DATASET *dataset, char *name)
{
    int i;

    for (i = 0 ; i < dataset->tables->number ; i++) {
        if (!strcmp(((char **)dataset->tablenames->ptr)[i], name)) {
            return(((TABLE **)dataset->tables->ptr)[i]);
        }
    }
    return(NULL);
}

/**
 ** Search the dataset to see if this table already exists in it.
 ** If not, make a new TABLE
 ** Add the file entries from <path> and sort.
 **
 ** Wed Jul 12 00:16:55 MST 2000
 **     SortFiles was moved out of LoadFilenames and into here
 **
 ** Thu May 25 15:47:25 MST 2000
 **     Added check on LoadLabel return 
 **     (file can be rejected resulting in a NULL label
 **
 ** Thu May 25 15:46:38 MST 2000
 **     Changed function from void to int for error checking purposes
 **/

int
LoadTable(DATASET *dataset, char *path, char *name)
{
    TABLE *table = FindTable(dataset, name);

    if (table == NULL) {
        table = calloc(1, sizeof(TABLE));
        table->files = LoadFilenames(path, name);
        
        if (table->files == NULL || table->files->number == 0) {
			/**
			 ** No fragments found for the table, so free the 
			 ** table structure. It is a liability to keep it
			 ** around.
			 **/
            free(table);        /* this should be FreeTable */
        } else {
            SortFiles(table->files);
            list_add(dataset->tables, table);
            list_add(dataset->tablenames, (void *)strdup(name));
            table->label = LoadLabel((table->files->ptr)[0]);

            if (table->label == NULL) {
                free(table);
                return(1);
            }
            table->label->table = table;

        }
    } else {
        LIST *files = LoadFilenames(path, name);
        list_merge(table->files, files);
        SortFiles(table->files);

        /* Check for duplicate file names? */
    }

    return(0);
}
