#!/usr/bin/perl

# Display game skill metrics for games ordered by standard deviation.
# Bruno Wolff III
# Last revised October 13, 2012

# While areaid and gameid can't currently contain any uri specials, I want
# assume they can here, so I if tweak the database schema, things don't
# break here. The problem characters that might eventually be allowed
# are '#', '/' and '?'.

# Use unbuffered output as there are some long delays while doing queries

select(STDOUT);
$| = 1;

$" = '';

use Pg;
use Statistics::Distributions;

# Use this to clean stuff extracted from the database for html output
sub clean(@) {
  local $str = "@_";
  $str =~ s/&/&amp;/g;
  $str =~ s/</&lt;/g;
  $str =~ s/>/&gt;/g;
  $str =~ s/"/&quot;/g;
  return $str;
}

# Use this to convert REQUEST_URI to unescaped string
sub urldecode(@) {
  local $str = "@_";
  $str =~ s/\%[0-9a-fA-F]{2}/$urldhash{$&}/eg;
  return $str;
}

# Use this to make sure urls don't contain url specials
sub urlencode(@) {
  local $str = "@_";
  $str =~ s/[^-_A-Za-z0-9.$+!*'(),]/$urlehash{$&}/eg;
  return $str;
}

# One time build of data used by url decode
sub urlinit() {
  my $c;
  my $f;
  my $i;
  for ($i=0; $i<=255; $i++) {
    $c = chr($i);
    $f = sprintf('%%%.2X', $i);
    $urldhash{$f} = $c;
    $urldhash{lc($f)} = $c;
    $urlehash{$c} = $f;
  }
}
urlinit;

if ($ENV{REQUEST_METHOD} ne '' && $ENV{REQUEST_METHOD} ne 'GET' &&
  $ENV{REQUEST_METHOD} ne 'HEAD') {
  $meth = clean($ENV{REQUEST_METHOD});
  $uri = clean($ENV{REQUEST_URI});
  print <<EOF;
content-type: text/html; charset=UTF-8
status: 501 Method not implemented

<html><head>
<title>501 Method Not Implemented</title>
</head><body>
<h1>Method Not Implemented</h1>
Method '$meth' not implented for $uri.
</body></html>
EOF
  exit;
}

print <<"EOF";
content-type: text/html; charset=UTF-8

<html>
<head>
<title>Game skill metrics ordered by standard deviation</title>
</head>
<body>
EOF
{
    local *INPUT;
    if (open(INPUT, "links.html")) {
        print while (<INPUT>);
        close INPUT;
    }
} 
print << "EOF";
<h1><a name="_">Game skill metrics ordered by standard deviation</a></h1>
EOF

# First try to connect
$conn = Pg::connectdb('dbname=area');
if ($conn->status != PGRES_CONNECTION_OK) {
  print << "EOF";
Unable to connect the AREA database.
EOF
{
    local *INPUT;
    if (open(INPUT, "links.html")) {
        print while (<INPUT>);
        close INPUT;
    }
} 
{
    local *INPUT;
    if (open(INPUT, "sig.html")) {
        print while (<INPUT>);
        close INPUT;
    }
} 
print << "EOF";
</body>
</html>
EOF
  exit;
}

print << "EOF";
<p>
The standard deviation of AREA ratings gives an indication of the relative
skill levels of two games. The game with more skill will have a higher
standard deviation. For the standard deviation to be useful in this
respect, a game should have a number of players who are rated and have
played enough games for their ratings to be relatively accurate.
<p>
The 90% low and high values bound the actual standard deviations for
which the null hypothesis that the actual standard deviation is the
actual standard deviation would not be rejected at the .1
significance level.
<p>
The mean should be close to 5000, but because ratings for players are
normally dropped after 12 (previously 10) years, the mean can be different.
My feeling is that means greater than 5000 should be normal as players who
try a game and don't like it will typically give up before they get very
good in the game. A mean less than 5000 probably indicates that very good
players get bored with the game.
<p>
EOF

# Since there is only one query no transaction isolation setting is needed

$result = $conn->exec("select games.gameid, title, case when count(rate) > 1 then stddev(rate)::bigint end, avg(rate)::bigint, count(rate), case when count(case when frq >= 10 then rate end) > 1 then stddev(case when frq >= 10 then rate end)::bigint end, count(case when frq >= 10 then rate end) from games left join crate on (games.gameid = crate.gameid and frq > 0) group by games.gameid, title order by stddev(rate) is null, stddev(rate) desc, lower(title), games.gameid");
if ($result->resultStatus != PGRES_TUPLES_OK) {
  print << "EOF";
Unable to access the games or crate tables.
EOF
{
    local *INPUT;
    if (open(INPUT, "links.html")) {
        print while (<INPUT>);
        close INPUT;
    }
} 
{
    local *INPUT;
    if (open(INPUT, "sig.html")) {
        print while (<INPUT>);
        close INPUT;
    }
} 
print << "EOF";
</body>
</html>
EOF
  exit;
}
if ($result->ntuples <= 0) {
  print << "EOF";
No games were found.
EOF
{
    local *INPUT;
    if (open(INPUT, "links.html")) {
        print while (<INPUT>);
        close INPUT;
    }
} 
{
    local *INPUT;
    if (open(INPUT, "sig.html")) {
        print while (<INPUT>);
        close INPUT;
    }
} 
print << "EOF";
</body>
</html>
EOF
  exit;
}

print "<table><col align=\"right\" span=\"9\"><col>\n<thead><tr><th>Standard Deviation for 10+ matches<th>90% Low<th>90% High<th>Number of people with 10+ matches<th>Standard Deviation (Skill)<th>90% Low <th>90% High<th>Mean<th>Number of ratings<th>Title<tbody>\n";
while (@row = $result->fetchrow) {
  $code = clean(urlencode($row[0]));
  $title = clean($row[1]);
  $row[2] = 'Undefined' unless defined $row[2];
  $stddev = clean($row[2]);
  $row[3] = 'Undefined' unless defined $row[3];
  $mean = clean($row[3]);
  $count = clean($row[4]);
  $row[5] = 'Undefined' unless defined $row[5];
  $stddev10 = clean($row[5]);
  $count10 = clean($row[6]);
  $low = 'Undefined';
  $high = 'Undefined';
  if ($count > 1) {
    $low = int(.5 + $stddev * sqrt(($count-1)/
      Statistics::Distributions::chisqrdistr($count-1,.05)));
    $high = int(.5 + $stddev * sqrt(($count-1)/
      Statistics::Distributions::chisqrdistr($count-1,.95)));
  }
  $low = clean($low);
  $high = clean($high);
  $low10 = 'Undefined';
  $high10 = 'Undefined';
  if ($count10 > 1) {
    $low10 = int(.5 + $stddev10 * sqrt(($count10-1)/
      Statistics::Distributions::chisqrdistr($count10-1,.05)));
    $high10 = int(.5 + $stddev10 * sqrt(($count10-1)/
      Statistics::Distributions::chisqrdistr($count10-1,.95)));
  }
  $low10 = clean($low10);
  $high10 = clean($high10);
  print "<tr><td><a href=\"stddev10.html#$code\">$stddev10</a><td>$low10<td>$high10<td>$count10<td><a name=\"$code\">$stddev</a><td>$low<td>$high<td><a href=\"mean.html#$code\">$mean</a><td>$count<td><a href=\"G_$code.html#_\">$title</a>\n";
}
print <<EOF;
</table>
EOF
{
    local *INPUT;
    if (open(INPUT, "links.html")) {
        print while (<INPUT>);
        close INPUT;
    }
} 
{
    local *INPUT;
    if (open(INPUT, "sig.html")) {
        print while (<INPUT>);
        close INPUT;
    }
} 
print << "EOF";
</body>
</html>
EOF
