# Package encoding::ChkCode
# Version 1.0
# Part of "WWW Cyrillic Encoding Suite"
# Get docs and newest version from
#	http://www.neystadt.org/cyrillic/
#
# Copyright (c) 1997-98, John Neystadt <http://www.neystadt.org/john/>
# You may install this script on your web site for free
# To obtain permision for redistribution or any other usage
#	contact john@neystadt.org.
#
# Drop me a line if you deploy this script on tyour site.

package encoding::ChkCode;

use encoding::trans;
use encoding::StatKoi;
use encoding::StatWin;

$PairSize = 2;
$MinRatio = 1.5; # Mark must be in $MinRatio times larger of 
			# one encoding than another to decide upon, or ENG.
$DoubtRatio = 1;
$DoubtLog = 'doubt.txt';

sub GetEncoding {
	my (@Data) = @_;
	my ($KoiMark) = GetCodeScore ('Koi', @Data);
	my ($WinMark) = GetCodeScore ('Win', @Data);

	print "GetEncoding: Koi8 - $KoiMark, Win - $WinMark\n";

	$KoiRatio =  $KoiMark/($WinMark+1);
	$WinRatio =  $WinMark/($KoiMark+1);

	if ($DoubtLog) {
		if (($KoiRatio < $MinRatio && $KoiRatio > $DoubtRatio) ||
			($WinRatio < $MinRatio && $WinRatio > $DoubtRatio)) {
				open Log, ">>$DoubtLog";
				print Log " Koi8 - $KoiMark, Win - $WinMark\n", 
					join ("\n", @Data), "\n\n";
				close Log;
		}
	}

	return 'KOI8' if $KoiRatio > $WinRatio;	# $MinRatio;
	return 'WIN'; 				# if $WinRatio > $MinRatio;
#	return 'ENG';
}

sub GetCodeScore {
	my ($Code, @Data) = @_;
	if ($Code eq 'Koi') {
		$Table = \%Encoding::StatKoi::StatsTableKoi;
	} elsif ($Code eq 'Win') {
		$Table = \%Encoding::StatWin::StatsTableWin;
	} else {
		die "Don't know $Code!\n";
	}

	my ($Mark);
	for (@Data) {
		s/[\n\r]//go;
		$_ = Encoding::Trans::LCase ($_, $Code);
		for (split (/[\.\,\-\s\:\;\?\!\'\"\(\)\d<>]+/o)) {
			for $i (0..length ()-$PairSize) {
				$Mark += ${$Table} {substr ($_, $i, $PairSize)};
			}
		}
	}

	$Mark;
}
