#!/usr/bin/perl
#------------------------------------------------------------------------------
# Copyright (C) 2001 Thomax Kaulmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

use lib '../config';
use lib '../oma-bin';
use CGI;
use DBI;
use OMA;
use XML::Parser;
use HTTP::Date qw(str2time time2isoz);
use Unicode::String qw(utf8 latin1 utf16);
use Net::HTTP;
use WWW::Curl::easy;
use Encode;

local $body;
local $n=0;
local @res;
local %row;
local %fieldattr;
local $OAIURL = "http://archive.b.lab.net/oma-bin/oma-oai";
my $curl;


sub parsetag {
	local $xmltext = shift;
	local $op = shift;

	local @xmlarr = split(/(\r|\n)/,$xmltext);
	$xmltext = join("\n",@xmlarr);

	local $p = XML::Parser->new(Style => 'Stream', ProtocolEncoding => 'UTF-8');
	local $doc = $p->parse($xmltext);

	sub StartDocument {}
	sub EndTag {
		local $exp = shift;
		local $type = shift;
		local %attr = %_;
		local $tag = $_;

		if($op eq "ListIdentifiers") {
			if($type eq "header") { $n++; }
		}
	}

	sub Text {
		local $exp = shift;
		local $type = shift;
		local %attr = %_;
		local $tag = $_;

		my %stage = %$exp;
		my $cp = $stage{Context};
		my @context = @$cp;

		if($op eq "ListIdentifiers") {
			if($context[$#context] eq "identifier") {
				$res[$n]{identifier} = $stage{'Text'};
			} elsif($context[$#context] eq "setSpec") {
				$res[$n]{setSpec} = $stage{'Text'};
			} elsif($context[$#context] eq "resumptionToken") {
				$res[0]{resumptionToken} = $stage{'Text'};
			}
		} elsif($op eq "GetRecord") {

			if(($context[$#context] eq "dc:title" ||
				$context[$#context] eq "dc:date" ||
				$context[$#context] eq "dc:format" ||
				$context[$#context] eq "dc:description" ||
				$context[$#context] eq "dc:coverage")
			&&	$attr{type}) {
#				$stage{'Text'} =~ tr/\n\n\n/\n\n/s;
				$stage{'Text'} =~ s/\n\n/\n/g;
				$row{$context[$#context]} = $stage{'Text'};
				$fieldattr{$context[$#context]} = ['type', $attr{type}];
			} elsif($context[$#context] eq "dc:rights" && $attr{license}) {
					$row{$context[$#context]} = $stage{'Text'};
					$fieldattr{$context[$#context]} = ['license', $attr{license}];
			} elsif(($context[$#context] eq "dc:creator" ||
					$context[$#context] eq "dc:contributor" ||
					$context[$#context] eq "dc:relation")
				&&	$attr{role}) {
					$row{$context[$#context]} = $stage{'Text'};
					$fieldattr{$context[$#context]} = ['role', $attr{role}];
			} else {
				if($context[$#context] =~ /^dc:/) {
					$row{$context[$#context]} = "$stage{'Text'}";
				}
			}
		}
	}

	sub StartTag {
		local $exp = shift;
		local $type = shift;
		local %attr = %_;
		local $tag = $_;

	}
}


sub header_callback {
	my ( $chunk, $handle ) = @_;
	return length($chunk);
}

sub body_callback {
	my ( $chunk, $handle ) = @_;
	$body .= $chunk;
	return length($chunk);
}

sub ee
{
	my $toencode = shift;

	return undef unless (defined $toencode);
    
	$toencode=~s/&/&amp;/g;
	$toencode=~s/\"/&quot;/g;
	$toencode=~s/>/&gt;/g;
	$toencode=~s/</&lt;/g;
#    $toencode=~s/([\xA0-\xFF])/"&#".ord($1).";"/ge;
	return utf8($toencode);
}


$curl = WWW::Curl::easy->new();
$curl->setopt(CURLOPT_WRITEFUNCTION, \&body_callback);
$curl->setopt(CURLOPT_HEADERFUNCTION, \&header_callback);

do {
	$body="";
	if($res[0]{resumptionToken}) {
		$curl->setopt(CURLOPT_URL, "$OAIURL" .
				"?verb=ListIdentifiers&resumptionToken=$res[0]{resumptionToken}" );
	} else {
		$curl->setopt(CURLOPT_URL, "$OAIURL?verb=ListIdentifiers&metadataPrefix=oai_dc" );
	}
	$curl->perform;
	$res[0]{resumptionToken} = "";
	parsetag($body,"ListIdentifiers");
} while($res[0]{resumptionToken});


for($i=0;$i<=$#res;$i++) {
	$body="";
	undef(%row);
	undef(%fieldattr);


	$curl->setopt(CURLOPT_URL, "$OAIURL" .
			"?verb=GetRecord&identifier=$res[$i]{identifier}&metadataPrefix=oai_dc" );
	$curl->perform;

	parsetag($body,"GetRecord");

#	$out .= "$i) $res[$i]{identifier}\n";
	$out = "<omadc>\n";
	$row{'dc:type'} = $res[$i]{setSpec};
	foreach $k (keys %row) {
		if($fieldattr{$k}) {
			if($fieldattr{$k}[0] eq "type") {
				$out .= "<$k:$fieldattr{$k}[1]>". ee($row{$k}) ."</$k:$fieldattr{$k}[1]>\n";
			} elsif($fieldattr{$k}[0] eq "license" || $fieldattr{$k}[0] eq "role") {
				$out .= "<$k>". ee($row{$k}) ."</$k>\n";
				$out .= "<$k:$fieldattr{$k}[0]>". ee($fieldattr{$k}[1]) ."</$k:$fieldattr{$k}[0]>\n";
			} else {
				$out .= "<$k:$fieldattr{$k}[0]>". ee($row{$k}) ."</$k:$fieldattr{$k}[0]>\n";
			}
		} else {
			$out .= "<$k>". ee($row{$k}) ."</$k>\n";
		}
	}
	$out .= "</omadc>\n\n\n\n\n";
	print $out;
#	$out="";

}



