#!/usr/bin/perl

# Fetch a copy of the WBC's game list page to see which events have urls
# (fetch for those that do) and to get event titles.

# Bruno Wolff III
# Last updated July 22, 2006

use LWP::UserAgent;
use HTML::TokeParser;
use charnames ':full';

$base = 'http://www.boardgamers.org/';

$ua = new LWP::UserAgent;
my $req = new HTTP::Request GET => $base . 'gamelist.htm';
my $res = $ua->request($req);

if (!$res->is_success) {
  print STDERR "Unable to fetch game list.\n";
  exit;
}

if (!open(EVENT, '>wbcevent.new')) {
  print STDERR "Unable to open wbcevent.new.\n";
  exit;
}

$p = HTML::TokeParser->new(\$res->content);

$state = 0;
while ($token = $p->get_token) {
  if ($state == 0) {
    if (${$token}[0] eq 'S' && ${$token}[1] eq 'th') {
      $state = 1;
    }
  }
  elsif ($state == 1) {
    if (${$token}[0] eq 'S' && ${$token}[1] eq 'tr') {
      $code = '';
      $url = '';
      $title = '';
      $state = 2;
    }
  }
  elsif ($state == 2) {
    if (${$token}[0] eq 'S' && ${$token}[1] eq 'tr') {
      $state = 2;
    }
    elsif (${$token}[0] eq 'S' && ${$token}[1] eq 'td') {
      $state = 3;
    }
  }
  elsif ($state == 3) {
    if (${$token}[0] eq 'S' && ${$token}[1] eq 'tr') {
      if ($code ne '') {
        print EVENT "$code\t$title\t$url\n";
      }
      else {
        print STDERR "Empty code in gamelist.htm\n";
      }
      $code = '';
      $url = '';
      $title = '';
      $state = 2;
    }
    elsif (${$token}[0] eq 'S' && ${$token}[1] eq 'td') {
      $state = 5;
    }
    elsif (${$token}[0] eq 'E' && ${$token}[1] eq 'td') {
      $state = 4;
    }
    elsif (${$token}[0] eq 'T') {
      $p->unget_token($token);
      $code = $p->get_trimmed_text;
      $code =~ s/\t/ /g;
    }
  }
  elsif ($state == 4) {
    if (${$token}[0] eq 'S' && ${$token}[1] eq 'tr') {
      if ($code ne '') {
        print EVENT "$code\t$title\t$url\n";
      }
      else {
        print STDERR "Empty code in gamelist.htm\n";
      }
      $code = '';
      $url = '';
      $title = '';
      $state = 2;
    }
    elsif (${$token}[0] eq 'S' && ${$token}[1] eq 'td') {
      $state = 5;
    }
  }
  elsif ($state == 5) {
    if (${$token}[0] eq 'S' && ${$token}[1] eq 'tr') {
      if ($code ne '') {
        print EVENT "$code\t$title\t$url\n";
      }
      else {
        print STDERR "Empty code in gamelist.htm\n";
      }
      $code = '';
      $url = '';
      $title = '';
      $state = 2;
    }
    elsif (${$token}[0] eq 'S' && ${$token}[1] eq 'td') {
      if ($code ne '') {
        print EVENT "$code\t$title\t$url\n";
      }
      else {
        print STDERR "Empty code in gamelist.htm\n";
      }
      $state = 1;
    }
    elsif (${$token}[0] eq 'E' && ${$token}[1] eq 'td') {
      if ($code ne '') {
        print EVENT "$code\t$title\t$url\n";
      }
      else {
        print STDERR "Empty code in gamelist.htm\n";
      }
      $state = 1;
    }
    elsif (${$token}[0] eq 'T') {
      $p->unget_token($token);
      $text = $p->get_trimmed_text;
      $text =~ s/\N{HORIZONTAL ELLIPSIS}/.../g;
      $text =~ s/\t/ /g;
      if ($text ne '') {
        if ($title eq '') {
          $title = $text;
        }
        else {
          $title .= ' ' . $text;
        }
      }
    }
    elsif (${$token}[0] eq 'S' && ${$token}[1] eq 'a') {
      if(defined(${$token}[2]{href})) {
        $url = ${$token}[2]{href};
        $url =~ s;^/+;;;
        $url = $base . $url;
        $url =~ s/\t/ /g;
      }
    }
  }
}
