[新手上路]批处理新手入门导读[视频教程]批处理基础视频教程[视频教程]VBS基础视频教程[批处理精品]批处理版照片整理器
[批处理精品]纯批处理备份&还原驱动[批处理精品]CMD命令50条不能说的秘密[在线下载]第三方命令行工具[在线帮助]VBScript / JScript 在线参考
返回列表 发帖
  1. use Encode;
  2. use Modern::Perl;
  3. use File::Slurp;
  4. use Mojo::UserAgent;
  5. use File::Basename qw/basename/;
  6. use File::Path qw/mkpath/;
  7. STDOUT->autoflush(1);
  8. our $ua = Mojo::UserAgent->new();
  9. our $main = "http://www.court.gov.cn";
  10. our $wdir = "F:/temp/gov_wenshu";
  11. mkpath $wdir unless -e $wdir;
  12. #获取尾页代码,前缀
  13. my ($prefix, $maxpg) = get_max_pgcode( $main ."/wenshu.html" );
  14. for my $id ( 1 .. $maxpg ) {
  15.     printf "${main}${prefix}$id.html\n";
  16.     get_article( "${main}${prefix}$id.html" );
  17. }
  18. sub get_article
  19. {
  20.     our ($main, $wdir);
  21.     my ( $link ) = @_;
  22.     my $res;
  23.     my $fpath;
  24.     my $dom = $ua->get( $link )->result->dom;
  25.     for my $e ( $dom->find(".list .l li a")->each )
  26.     {
  27.         printf "%s\n", basename($e->attr("href"));
  28.         $fpath = $wdir ."/". basename($e->attr("href"));
  29.         next if ( -e $fpath );
  30.         $res = $ua->get( $main . $e->attr("href") )->result;
  31.         write_file( $fpath, $res->body );
  32.     }
  33. }
  34. sub get_max_pgcode
  35. {
  36.     my ( $link ) = @_;
  37.     my $res = $ua->get( $link )->result;
  38.     my $href = $res->dom->at(".yiiPager .last a")->attr("href");
  39.     if ($href =~/^(.*\/)(\d+)\.html/) { return ($1, $2); }
  40.     else { printf "Failed to get max page code\n"; return undef }
  41. }
复制代码

TOP

返回列表