mixi
ユーザのプロフィール情報をデータベースに格納するだけです。普通に文字化けとかします。
create table mixi ( id int not null primary key, hn varchar(21), image varchar(255), name varchar(42), sex enum('男性', '女性'), address varchar(20), age varchar(3), birth varchar(8), blood varchar(4), home varchar(20), hobby varchar(255), job varchar(15), department varchar(200), introduce text, favorite varchar(255) );
mixi.pl
use Web::Scraper; use WWW::Mechanize; use URI; use DBI; use utf8; use Encode; my $dbh = DBI->connect('DBI:mysql:DATABASENAME','USER','PASSWD'); binmode STDIN,":encoding(utf8)"; binmode STDOUT,":encoding(utf8)"; my $mech = WWW::Mechanize->new; $mech->get('http://mixi.jp/'); my $scraper = scraper { process '.name a', 'hn[]' => 'TEXT', process '.photo img', 'images[]' => '@src', process '.profileListTable th', 'category[]' => 'TEXT', process '.profileListTable td', 'text[]' => 'TEXT' }; $mech->submit_form( fields => { email => 'MAIL', password => 'PASSWD', }, ); my %profile; for (1..6千万) { my ($id,$hn,$image,$name,$sex,$address,$age,$birth,$blood,$home,$hobby,$job,$dep,$intro,$favo1,$favo2,$favo3) = ('null','null','null','null','null','null','null','null','null','null','null','null','null','null','null','null','null'); my $favo = 0; $mech->get("http://mixi.jp/show_friend.pl?id=$_"); my $res = $scraper->scrape($mech->content); $id = $_; # insert into ID my $sth = $dbh->prepare("insert into mixi(id) values ($id)"); $sth->execute(); $sth->finish(); # ニックネーム $hn = $profile{'hn'}[$id] = shift @{$res->{hn}}; $sth = $dbh->prepare("update mixi set hn = \'$hn\' where id = \'$id\'"); $sth->execute(); # プロフィール画像 $image = $profile{'image'}[$id] = shift @{$res->{images}}; $sth = $dbh->prepare("update mixi set image = \'$image\' where id = \'$id\'"); $sth->execute(); for (0..12) { my $category = shift @{$res->{category}}; # 削除されたIDまたはプロフィールを全て読み込んだなら抜ける if (!($category)) { last; } # カテゴリごとに配列のハッシュに格納してデータベースにぶち込む if ($category =~ /名前/) { $name = $profile{'name'}[$id] = shift @{ $res->{text} }; $sth = $dbh->prepare("update mixi set name = \'$name\' where id = \'$id\'"); $sth->execute(); } elsif ($category =~ /性別/) { $sex = $profile{'sex'}[$id] = shift @{ $res->{text} }; $sth = $dbh->prepare("update mixi set sex = \'$sex\' where id = \'$id\'"); $sth->execute(); } elsif ($category =~ /現住所/) { $address = $profile{'address'}[$id] = shift @{ $res->{text} }; $sth = $dbh->prepare("update mixi set address = \'$address\' where id = \'$id\'"); $sth->execute(); } elsif ($category =~ /年齢/) { $age = $profile{'age'}[$id] = shift @{ $res->{text} }; $sth = $dbh->prepare("update mixi set age = \'$age\' where id = \'$id\'"); $sth->execute(); } elsif ($category =~ /誕生日/) { $birth = $profile{'birth'}[$id] = shift @{ $res->{text} }; $sth = $dbh->prepare("update mixi set birth = \'$birth\' where id = \'$id\'"); $sth->execute(); } elsif ($category =~ /血液型/) { $blood = $profile{'blood'}[$id] = shift @{ $res->{text} }; $blood =~ s/型//; $sth = $dbh->prepare("update mixi set blood = \'$blood\' where id = \'$id\'"); $sth->execute(); } elsif ($category =~ /出身地/) { $home = $profile{'home'}[$id] = shift @{ $res->{text} }; $sth = $dbh->prepare("update mixi set home = \'$home\' where id = \'$id\'"); $sth->execute(); } elsif ($category =~ /趣味/) { $hobby = $profile{'hobby'}[$id] = shift @{ $res->{text} }; $sth = $dbh->prepare("update mixi set hobby = \'$hobby\' where id = \'$id\'"); $sth->execute(); } elsif ($category =~ /職業/) { $job = $profile{'job'}[$id] = shift @{ $res->{text} }; $sth = $dbh->prepare("update mixi set job = \'$job\' where id = \'$id\'"); $sth->execute(); } elsif ($category =~ /所属/) { $dep = $profile{'dep'}[$id] = shift @{ $res->{text} }; $sth = $dbh->prepare("update mixi set department = \'$dep\' where id = \'$id\'"); $sth->execute(); } elsif ($category =~ /自己紹介/) { $intro = $profile{'intro'}[$id] = shift @{ $res->{text} }; $sth = $dbh->prepare("update mixi set introduce = \'$intro\' where id = \'$id\'"); $sth->execute; } elsif ($category =~ /好きな/) { $favo++; if ($favo == 1) { $favo1 = $dbh->quote($profile{'favorite'}[$id] = shift @{ $res->{text} }); } elsif ($favo == 2) { $favo2 = $dbh->quote($profile{'favorite'}[$id] = shift @{ $res->{text} }); } elsif ( $favo == 3) { $favo3 = $dbh->quote($profile{'favorite'}[$id] = shift @{ $res->{text} }); } } } if ($favo == 1) { $sth = $dbh->prepare("update mixi set favorite = $favo1 where id = \'$id\'"); $sth->execute; } elsif ($favo == 2) { $sth = $dbh->prepare("update mixi set favorite = $favo1 $favo2 where id = \'$id\'"); $sth->execute; } elsif ($favo == 3) { $sth = $dbh->prepare("update mixi set favorite = $favo1 $favo2 $favo3 where id = \'$id\'"); $sth->execute; } $sth->finish; } $dbh->disconnect;
あ、quoteしてない。