#!/usr/bin/perl

# Display game skill metrics for games ordered by standard deviation
# for players with 20+ matches for games for which there are at least
# 20 such players.
# Bruno Wolff III
# Last revised October 13, 2012

# While areaid and gameid can't currently contain any uri specials, I want
# assume they can here, so I if tweak the database schema, things don't
# break here. The problem characters that might eventually be allowed
# are '#', '/' and '?'.

# Use unbuffered output as there are some long delays while doing queries

select(STDOUT);
$| = 1;

$" = '';

use Pg;
use Statistics::Distributions;

# Use this to clean stuff extracted from the database for html output
sub clean(@) {
  local $str = "@_";
  $str =~ s/&/&amp;/g;
  $str =~ s/</&lt;/g;
  $str =~ s/>/&gt;/g;
  $str =~ s/"/&quot;/g;
  return $str;
}

# Use this to convert REQUEST_URI to unescaped string
sub urldecode(@) {
  local $str = "@_";
  $str =~ s/\%[0-9a-fA-F]{2}/$urldhash{$&}/eg;
  return $str;
}

# Use this to make sure urls don't contain url specials
sub urlencode(@) {
  local $str = "@_";
  $str =~ s/[^-_A-Za-z0-9.$+!*'(),]/$urlehash{$&}/eg;
  return $str;
}

# One time build of data used by url decode
sub urlinit() {
  my $c;
  my $f;
  my $i;
  for ($i=0; $i<=255; $i++) {
    $c = chr($i);
    $f = sprintf('%%%.2X', $i);
    $urldhash{$f} = $c;
    $urldhash{lc($f)} = $c;
    $urlehash{$c} = $f;
  }
}
urlinit;

if ($ENV{REQUEST_METHOD} ne '' && $ENV{REQUEST_METHOD} ne 'GET' &&
  $ENV{REQUEST_METHOD} ne 'HEAD') {
  $meth = clean($ENV{REQUEST_METHOD});
  $uri = clean($ENV{REQUEST_URI});
  print <<EOF;
content-type: text/html; charset=UTF-8
status: 501 Method not implemented

<html><head>
<title>501 Method Not Implemented</title>
</head><body>
<h1>Method Not Implemented</h1>
Method '$meth' not implented for $uri.
</body></html>
EOF
  exit;
}

print <<"EOF";
content-type: text/html; charset=UTF-8

<html>
<head>
<title>Game skill metrics ordered by standard deviation for players with 20+ matches for games with 20+ such players</title>
</head>
<body>
EOF
{
    local *INPUT;
    if (open(INPUT, "links.html")) {
        print while (<INPUT>);
        close INPUT;
    }
} 
print << "EOF";
<h1><a name="_">Game skill metrics ordered by standard deviation for players with 20+ matches for games with 20+ such players</a></h1>
EOF

# First try to connect
$conn = Pg::connectdb('dbname=area');
if ($conn->status != PGRES_CONNECTION_OK) {
  print << "EOF";
Unable to connect the AREA database.
EOF
{
    local *INPUT;
    if (open(INPUT, "links.html")) {
        print while (<INPUT>);
        close INPUT;
    }
} 
{
    local *INPUT;
    if (open(INPUT, "sig.html")) {
        print while (<INPUT>);
        close INPUT;
    }
} 
print << "EOF";
</body>
</html>
EOF
  exit;
}

print << "EOF";
<p>
The standard deviation of AREA ratings gives an indication of the relative
skill levels of two games. The game with more skill will have a higher
standard deviation. For the standard deviation to be useful in this
respect, a game should have a number of players who are rated and have
played enough games for their ratings to be relatively accurate.
<p>
The 90% low and high values bound the actual standard deviations for
which the null hypothesis that the actual standard deviation is the
actual standard deviation would not be rejected at the .1
significance level.
<p>
EOF

# Since there is only one query no transaction isolation setting is needed

$result = $conn->exec("select games.gameid, title, stddev(rate)::bigint, count(rate) from games, crate where games.gameid = crate.gameid and frq >= 20 group by games.gameid, title having count(rate) >= 20 order by stddev(rate) desc, lower(title), games.gameid");
if ($result->resultStatus != PGRES_TUPLES_OK) {
  print << "EOF";
Unable to access the games or crate tables.
EOF
{
    local *INPUT;
    if (open(INPUT, "links.html")) {
        print while (<INPUT>);
        close INPUT;
    }
} 
{
    local *INPUT;
    if (open(INPUT, "sig.html")) {
        print while (<INPUT>);
        close INPUT;
    }
} 
print << "EOF";
</body>
</html>
EOF
  exit;
}
if ($result->ntuples <= 0) {
  print << "EOF";
No games were found.
EOF
{
    local *INPUT;
    if (open(INPUT, "links.html")) {
        print while (<INPUT>);
        close INPUT;
    }
} 
{
    local *INPUT;
    if (open(INPUT, "sig.html")) {
        print while (<INPUT>);
        close INPUT;
    }
} 
print << "EOF";
</body>
</html>
EOF
  exit;
}

print "<table><col align=\"right\" span=\"4\"><col>\n<thead><tr><th>Standard Deviation for 20+ matches<th>90% Low<th>90% High<th>Number of people with 20+ matches<th>Title<tbody>\n";
while (@row = $result->fetchrow) {
  $code = clean(urlencode($row[0]));
  $title = clean($row[1]);
  $row[2] = 'Undefined' unless defined $row[2];
  $stddev = clean($row[2]);
  $count = clean($row[3]);
  $low = 'Undefined';
  $high = 'Undefined';
  if ($count > 1) {
    $low = int(.5 + $stddev * sqrt(($count-1)/
      Statistics::Distributions::chisqrdistr($count-1,.05)));
    $high = int(.5 + $stddev * sqrt(($count-1)/
      Statistics::Distributions::chisqrdistr($count-1,.95)));
  }
  $low = clean($low);
  $high = clean($high);
  print "<tr><td>$stddev<td>$low<td>$high<td>$count<td><a href=\"G_$code.html#_\">$title</a>\n";
}
print <<EOF;
</table>
EOF
{
    local *INPUT;
    if (open(INPUT, "links.html")) {
        print while (<INPUT>);
        close INPUT;
    }
} 
{
    local *INPUT;
    if (open(INPUT, "sig.html")) {
        print while (<INPUT>);
        close INPUT;
    }
} 
print << "EOF";
</body>
</html>
EOF
