scraper.pl 1006 B

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. use strict;
  2. use warnings;
  3. use Database::DumpTruck;
  4. use HTML::TreeBuilder 4;
  5. use LWP::Simple;
  6. my $dt = new Database::DumpTruck ({ dbname => 'data.sqlite', table => 'swdata' });
  7. sub do_page
  8. {
  9. my $id = shift;
  10. my $tree = new_from_content HTML::TreeBuilder (get ("http://www.peticie.com/signatures/gothoom_2014_otvoreny_list_ministrovi_kultury_prezidentovi_pz/start/$id"));
  11. my $table = $tree->look_down (_tag => 'table', id => 'signatures');
  12. my $last_id;
  13. foreach my $row ($table->look_down (_tag => 'tr')) {
  14. my @line = map { $_->as_text } $row->look_down (_tag => 'td');
  15. s/^\s*// foreach @line;
  16. # Skip header
  17. next unless @line;
  18. $last_id = $line[0];
  19. # Bogus entry
  20. next unless @line > 2;
  21. $dt->insert ({
  22. Id => $line[0],
  23. Name => $line[1],
  24. Location => $line[2],
  25. Date => $line[3],
  26. });
  27. }
  28. $tree->delete;
  29. return $last_id;
  30. }
  31. my $id = eval { $dt->get_var ('last_id') } || 0;
  32. do {
  33. $id = do_page ($id);
  34. $dt->save_var ('last_id', $id) if defined $id;
  35. } while (defined $id);