# Package neystadt::http_rtr
# Version 1.0
# Part of "WWW Cyrillic Encoding Suite"
# Get docs and newest version from
#	http://www.neystadt.org/cyrillic/
#
# Copyright (c) 1997-98, John Neystadt <http://www.neystadt.org/john/>
# You may install this script on your web site for free
# To obtain permision for redistribution or any other usage
#	contact john@neystadt.org.
#
# Drop me a line if you deploy this script on tyour site.

package neystadt::http_rtr;

1;

%GoodMimeType = ('text/html' => 1, 'text/plain' => 1);
@_BadExts = ('gif' , 'jpg', 'jpeg', 'ps', 'gz', 'zip');
for (@_BadExts) {$BadExts {$_} = 1;}
$AF_INET = 2;
$SOCK_STREAM = 1;

sub Http_Retrieve {

repeat:
	my ($AbsUri, $host, $port, $url) = ParseUrl (@_[0]);
	# ermsg("a parse @_[0] $host !");
	exit 1 if !$host;

	$url =~ m|\.([^/\.]*)$|o;
	# if ($BadExts {lc ($1)}) {
	# 	ermsg("File extension ($1) is not good, skipping...\n"); 
	# 	exit 1;
	# }

	exit 1 if !ConnectUrl (Sock, $host, $port, $url);
	($HttpVer, $HttpStatus, $Hdrs, @_[2]) = MakeHttpRequest (Sock, $url, $host);

	if ($HttpStatus =~ /^2..$/o) {	# Ok
	#	if ($GoodMimeType { lc (${$Hdrs}{'content-type'})}) {
			@_[1]=GetHttpBody (Sock); #ermsg("body @_[1]");
	#	} else {
	#		ermsg("MIME type: ${$Hdrs}{'content-type'}, skipping...\n");
	#	}
	} elsif ($HttpStatus == 301 or $HttpStatus == 302) {	# Redirection
	 	@_[0]=${$Hdrs}{'location'}; goto repeat;
	} else {			# Error
		ermsg("Failed to retrieve! - HTTP Error $HttpStatus\n"); exit;
	}

	close (Sock);
	#ermsg("@_[2]\n@_[1]");
}

# *****

sub ConnectUrl {
	local (*S) = shift;
	my ($host, $port, $url) = @_;


	$sockaddr = 'S n a4 x8';

	($name,$aliases,$proto) = getprotobyname('tcp');
	($name,$aliases,$port) = getservbyname($port,'tcp')
	    unless $port =~ /^\d+$/;;
	($name,$aliases,$type,$len,$thataddr) = gethostbyname($host);
	if (!$name) {
		ermsg("Failed to resolve address \"$host\".\n");
		return 0;
	}

	$this = pack($sockaddr, $AF_INET, 0, $thisaddr);
	$that = pack($sockaddr, $AF_INET, $port, $thataddr);

	if (!socket(S, $AF_INET, $SOCK_STREAM, $proto)) { 
	   	ermsg("Can't create socket - $!\n");
		return 0;
	}

	if (connect(S,$that)) {
	} else {
	   	ermsg("Unable to connect.\n");
		return 0;
	}

	select(S); $| = 1; select(STDOUT);

	1;
}

# ******

sub MakeHttpRequest {
	local (*S) = shift (@_);
	my ($url, $host) = @_;
	my ($hdr_string)='';

	print S "GET $url HTTP/1.0
User-Agent: Code.pl/1.1
Accept: *.*
Host: $host
From: leonid\@neystadt.org\n\n";

	# Read Headers

	my (%Hdrs);
	my ($HttpVer, $HttpStatus) = ('0.9', '200');	
	my ($InHdrs) = 0;
	while (<S>) {
		$hdr_string .= $_ if $InHdrs;
		s/[\n\r]//go;

		if (!$InHdrs && m/^HTTP\/(\d\.\d)\s+(\d\d\d)/i) {
			($HttpVer, $HttpStatus) = ($1, $2);
			$InHdrs = 1;
			next;
		}

		$Hdrs { lc ($1) } = $2
			if m/^([^:]*)\:\s*(.*)$/oi;

		last if $_ eq '';
	}

	($HttpVer, $HttpStatus, \%Hdrs, $hdr_string);
}

#******

sub GetHttpBody {
	local (*S) = @_;

	my ($Body);
	while (<S>) {
		$Body .= $_;
	}
	$Body;	
}

#*******

sub ParseUrl {
	my ($AbsUri) = @_;

	if (!($AbsUri =~ m|^\s*http://\s*([^/]*)(.*)\s*$|io)) {
		return undef;
	}

	my ($host, $url, $port, $anchor) = ($1, $2, '', '');
	if ($host =~ m/^([^:]*):(.*)$/o) {	# split host & port
		($host, $port) = ($1, $2);
	}

	if ($url =~ m/^([^#]*)#(.*)$/o) {	# split url $ anchor
		($url, $anchor) = ($1, $2);
	}

	$host = lc ($host);
	$host =~ s|\.$||go;			# cut of . at end of host name

	$port = 80 unless $port;

	$url = '/' unless $url;
	$url =~ s|/[^/]*/\.\./|/|go;	# collapse /sss/../ constructions
	$url =~ s|^/\.\./|/|go;		# collpase ../xxx in the begining of url
	$url =~ s|/\./|/|go;		# collapse /./ constructions
	$url =~ s|/\.$|/|go;		# collapse /. at url end
	$url =~ s|//|/|go;		# collpase double slashes (//)

	if ($port eq 80) {
		$AbsUri = "http://$host$url";
	} else {
		$AbsUri = "http://$host:$port$url";
	}

	$AbsUri .= "\#$anchor" if $anchor;

	($AbsUri, $host, $port, $url, $anchor);
}

sub ermsg {
	if (!$sw) {$sw=1; print "Content-type: text/plain\n\n";}
	print "@_[0]\n";
}