#!/usr/bin/perl -w

###########################################################################
#
# import.pl --
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################


# This program will import a number of files into a particular collection

package parallel_import;

BEGIN {
    die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
    die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
    unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts");

    if (defined $ENV{'GSDLEXTS'}) {
	my @extensions = split(/:/,$ENV{'GSDLEXTS'});
	foreach my $e (@extensions) {
	    my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";

	    unshift (@INC, "$ext_prefix/perllib");
	    unshift (@INC, "$ext_prefix/perllib/cpan");
	    unshift (@INC, "$ext_prefix/perllib/plugins");
	    unshift (@INC, "$ext_prefix/perllib/plugouts");
	}
    }
    if (defined $ENV{'GSDL3EXTS'}) {
	my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
	foreach my $e (@extensions) {
	    my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";

	    unshift (@INC, "$ext_prefix/perllib");
	    unshift (@INC, "$ext_prefix/perllib/cpan");
	    unshift (@INC, "$ext_prefix/perllib/plugins");
	    unshift (@INC, "$ext_prefix/perllib/plugouts");
	}
    }
}

use strict;

use inexport;

my $oidtype_list = 
    [ { 'name' => "hash",
        'desc' => "{import.OIDtype.hash}" },
      { 'name' => "assigned",
        'desc' => "{import.OIDtype.assigned}" },
      { 'name' => "incremental",
        'desc' => "{import.OIDtype.incremental}" },
      { 'name' => "dirname",
        'desc' => "{import.OIDtype.dirname}" } ];


# used to control output file format
my $saveas_list = 
    [ { 'name' => "GreenstoneXML",
        'desc' => "{export.saveas.GreenstoneXML}"},
      { 'name' => "GreenstoneMETS",
        'desc' => "{export.saveas.GreenstoneMETS}"},
      ];


# Possible attributes for each argument
# name: The name of the argument
# desc: A description (or more likely a reference to a description) for this argument
# type: The type of control used to represent the argument. Options include: string, int, flag, regexp, metadata, language, enum etc
# reqd: Is this argument required?
# hiddengli: Is this argument hidden in GLI?
# modegli: The lowest detail mode this argument is visible at in GLI

my $saveas_argument
    = { 'name' => "saveas",
	'desc' => "{import.saveas}",
	'type' => "enum",
	'list' => $saveas_list,
	'deft' => "GreenstoneXML",
	'reqd' => "no",
	'modegli' => "3" };


my $arguments = 
    [ 
      $saveas_argument,
      { 'name' => "archivedir",
	'desc' => "{import.archivedir}",
	'type' => "string",
	'reqd' => "no",
        'hiddengli' => "yes" },
      { 'name' => "importdir",
	'desc' => "{import.importdir}",
	'type' => "string",
	'reqd' => "no",
        'hiddengli' => "yes" },
      { 'name' => "collectdir",
	'desc' => "{import.collectdir}",
	'type' => "string",
	# parsearg left "" as default
	#'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"),
	'deft' => "",
	'reqd' => "no",
        'hiddengli' => "yes" },
      { 'name' => "site",
	'desc' => "{import.site}",
	'type' => "string",
	'deft' => "",
	'reqd' => "no",
        'hiddengli' => "yes" },
      { 'name' => "manifest",
	'desc' => "{import.manifest}",
	'type' => "string",
	'deft' => "",
	'reqd' => "no",
        'hiddengli' => "yes" },
      { 'name' => "debug",
	'desc' => "{import.debug}",
	'type' => "flag",
	'reqd' => "no",
        'hiddengli' => "yes" },
      { 'name' => "faillog",
	'desc' => "{import.faillog}",
	'type' => "string",
	# parsearg left "" as default
	#'deft' => &util::filename_cat("<collectdir>", "colname", "etc", "fail.log"),
	'deft' => "",
	'reqd' => "no",
        'modegli' => "3" },
      { 'name' => "incremental",
	'desc' => "{import.incremental}",
	'type' => "flag",
	'hiddengli' => "yes" },
      { 'name' => "keepold",
	'desc' => "{import.keepold}",
	'type' => "flag",
	'reqd' => "no",
	'hiddengli' => "yes" },
      { 'name' => "removeold",
	'desc' => "{import.removeold}",
	'type' => "flag",
	'reqd' => "no",
	'hiddengli' => "yes" },
      { 'name' => "language",
	'desc' => "{scripts.language}",
	'type' => "string",
	'reqd' => "no",
	'hiddengli' => "yes" },
      { 'name' => "maxdocs",
	'desc' => "{import.maxdocs}",
	'type' => "int",
	'reqd' => "no",
	# parsearg left "" as default
	#'deft' => "-1",
	'range' => "1,",
	'modegli' => "1" },
      # don't set the default to hash - want to allow this to come from
      # entry in collect.cfg but want to override it here 
      { 'name' => "OIDtype",
	'desc' => "{import.OIDtype}",
	'type' => "enum",
	'list' => $oidtype_list,
	# parsearg left "" as default
	#'deft' => "hash",
	'reqd' => "no",
	'modegli' => "2" },
      { 'name' => "OIDmetadata",
	'desc' => "{import.OIDmetadata}",
	'type' => "string",
	 #'type' => "metadata", #doesn't work properly in GLI
	# parsearg left "" as default
	#'deft' => "dc.Identifier",
	'reqd' => "no",
	'modegli' => "2" },
      { 'name' => "out",
	'desc' => "{import.out}",
	'type' => "string",
	'deft' => "STDERR",
	'reqd' => "no",
        'hiddengli' => "yes" },
      { 'name' => "sortmeta",
	'desc' => "{import.sortmeta}",
	'type' => "string",
	#'type' => "metadata", #doesn't work properly in GLI
	'reqd' => "no",
	'modegli' => "2" },
      { 'name' => "removeprefix",
	'desc' => "{BasClas.removeprefix}",
	'type' => "regexp",
	'deft' => "",
	'reqd' => "no", 
	'modegli' => "3" },
      { 'name' => "removesuffix",
	'desc' => "{BasClas.removesuffix}",
	'type' => "regexp",
	'deft' => "",
	'reqd' => "no",
	'modegli' => "3" }, 
      { 'name' => "groupsize",
	'desc' => "{import.groupsize}",
	'type' => "int",
	'deft' => "1",
	'reqd' => "no",
	'modegli' => "2" },
      { 'name' => "gzip",
	'desc' => "{import.gzip}",
	'type' => "flag",
	'reqd' => "no",
	'modegli' => "3" },
      { 'name' => "statsfile",
	'desc' => "{import.statsfile}",
	'type' => "string",
	'deft' => "STDERR",
	'reqd' => "no",
        'hiddengli' => "yes" },
      { 'name' => "verbosity",
	'desc' => "{import.verbosity}",
	'type' => "int",
	'range' => "0,",
	# parsearg left "" as default
	#'deft' => "2",
	'reqd' => "no",
	'modegli' => "3" },
      { 'name' => "gli",
	'desc' => "{scripts.gli}",
	'type' => "flag",
	'reqd' => "no",
	'hiddengli' => "yes" },
      { 'name' => "xml",
	'desc' => "{scripts.xml}",
	'type' => "flag",
	'reqd' => "no",
	'hiddengli' => "yes" },
# jobs and epoch added for parallel processing
# [hs, 1 july 2010]
      { 'name' => "epoch",
	'desc' => "{import.epoch}",
	'type' => "int",
	'range' => "1,",
	'deft' => "1",
	'reqd' => "no",
	'hiddengli' => "yes" },
      { 'name' => "jobs",
	'desc' => "{import.jobs}",
	'type' => "int",
	'range' => "1,",
	'deft' => "1",
	'reqd' => "no",
	'hiddengli' => "yes" }];

my $options = { 'name' => "import.pl",
		'desc' => "{import.desc}",
		'args' => $arguments };





sub main 
{
    my $inexport = new inexport("import",\@ARGV,$options);   

    my $collection = $inexport->get_collection();
    my ($config_filename,$collect_cfg) = $inexport->read_collection_cfg($collection,$options);    
    $inexport->set_collection_options($collect_cfg);
    
    my $pluginfo = $inexport->process_files($config_filename,$collect_cfg);

    $inexport->generate_statistics($pluginfo);
}

&main();