あっという間の十年ブログ【Ruby篇】

"詮ずるところ#{プログラミング}は、ただ年月長く倦まずおこたらずして、はげみつとむるぞ肝要にて、学びやうは、いかやうにてもよかるべく、さのみかかはるまじきことなり。いかほど学びかたよくても、おこたりてつとめざれば、功はなし。また、人々の才と不才とによりて、その功いたく異なれども、才不才は、生まれつきたることなれば、力に及びがたし。されど、大抵は、不才なる人といへども、おこたらずつとめだにすれば、それだけの功は有る物なり。" - 本居宣長

mixi

ユーザのプロフィール情報をデータベースに格納するだけです。普通に文字化けとかします。

mixi_table.sql

create table mixi (
	id int not null primary key,
	hn varchar(21), 
	image varchar(255),
	name varchar(42),
	sex enum('男性', '女性'),
	address varchar(20),
	age varchar(3),
	birth varchar(8),
	blood varchar(4),
	home varchar(20),
	hobby varchar(255),
	job varchar(15),
	department varchar(200),
	introduce text,
	favorite varchar(255)
); 

mixi.pl

use Web::Scraper;
use WWW::Mechanize;
use URI;
use DBI;
use utf8;
use Encode;

my $dbh = DBI->connect('DBI:mysql:DATABASENAME','USER','PASSWD');

binmode STDIN,":encoding(utf8)";
binmode STDOUT,":encoding(utf8)";

my $mech = WWW::Mechanize->new;
$mech->get('http://mixi.jp/');

my $scraper = scraper {
	process '.name a', 'hn[]' => 'TEXT',
	process '.photo img', 'images[]' => '@src',
	process '.profileListTable th', 'category[]'   => 'TEXT',
	process '.profileListTable td', 'text[]' => 'TEXT'
};

$mech->submit_form(
	fields => {
		email    => 'MAIL',
		password => 'PASSWD',
	},
);

my %profile;
for (1..6千万) {	
	
	my ($id,$hn,$image,$name,$sex,$address,$age,$birth,$blood,$home,$hobby,$job,$dep,$intro,$favo1,$favo2,$favo3) = ('null','null','null','null','null','null','null','null','null','null','null','null','null','null','null','null','null');
	my $favo = 0;
	$mech->get("http://mixi.jp/show_friend.pl?id=$_");
	my $res = $scraper->scrape($mech->content);

	$id = $_;
        # insert into ID
	my $sth = $dbh->prepare("insert into mixi(id) values ($id)");
	$sth->execute();
	$sth->finish();

	# ニックネーム
	$hn = $profile{'hn'}[$id] = shift @{$res->{hn}};
	$sth = $dbh->prepare("update mixi set hn = \'$hn\' where id = \'$id\'");
	$sth->execute();
	# プロフィール画像
	$image = $profile{'image'}[$id] = shift @{$res->{images}};
	$sth = $dbh->prepare("update mixi set image = \'$image\' where id = \'$id\'");
	$sth->execute();

	for (0..12) {
		my $category = shift @{$res->{category}};
		
		# 削除されたIDまたはプロフィールを全て読み込んだなら抜ける 
		if (!($category)) {
			last;
		}
		# カテゴリごとに配列のハッシュに格納してデータベースにぶち込む
		if ($category =~ /名前/) {
			$name = $profile{'name'}[$id] = shift @{ $res->{text} };
			$sth = $dbh->prepare("update mixi set name = \'$name\' where id = \'$id\'"); 
			$sth->execute();
		} elsif ($category =~ /性別/) {
			$sex = $profile{'sex'}[$id] = shift @{ $res->{text} };
			$sth = $dbh->prepare("update mixi set sex = \'$sex\' where id = \'$id\'"); 		
			$sth->execute();
		} elsif ($category =~ /現住所/) {
			$address =  $profile{'address'}[$id] = shift @{ $res->{text} };
			$sth = $dbh->prepare("update mixi set address = \'$address\' where id = \'$id\'");
			$sth->execute();
		} elsif ($category =~ /年齢/) {
			$age =  $profile{'age'}[$id] = shift @{ $res->{text} };
			$sth = $dbh->prepare("update mixi set age = \'$age\' where id = \'$id\'"); 
			$sth->execute();
		} elsif ($category =~ /誕生日/) {
			$birth = $profile{'birth'}[$id] = shift @{ $res->{text} };
			$sth = $dbh->prepare("update mixi set birth = \'$birth\' where id = \'$id\'"); 
			$sth->execute();
		} elsif ($category =~ /血液型/) {
			$blood = $profile{'blood'}[$id] = shift @{ $res->{text} };
			$blood =~ s///;
			$sth = $dbh->prepare("update mixi set blood = \'$blood\' where id = \'$id\'"); 
			$sth->execute();
		} elsif ($category =~ /出身地/) {
			$home = $profile{'home'}[$id] = shift @{ $res->{text} };
			$sth = $dbh->prepare("update mixi set home = \'$home\' where id = \'$id\'"); 
			$sth->execute();
		} elsif ($category =~ /趣味/) {
			$hobby =  $profile{'hobby'}[$id] = shift @{ $res->{text} };
			$sth = $dbh->prepare("update mixi set hobby = \'$hobby\' where id = \'$id\'");
			$sth->execute();
		} elsif ($category =~ /職業/) {
			$job =  $profile{'job'}[$id] = shift @{ $res->{text} };
			$sth = $dbh->prepare("update mixi set job = \'$job\' where id = \'$id\'"); 
			$sth->execute();
		} elsif ($category =~ /所属/) {
			$dep = $profile{'dep'}[$id] = shift @{ $res->{text} };
			$sth = $dbh->prepare("update mixi set department = \'$dep\' where id = \'$id\'"); 
			$sth->execute();
		} elsif ($category =~ /自己紹介/) {
			$intro = $profile{'intro'}[$id] = shift @{ $res->{text} };
			$sth = $dbh->prepare("update mixi set introduce = \'$intro\' where id = \'$id\'");
			$sth->execute;
		} elsif ($category =~ /好きな/) {
			$favo++;
			if ($favo == 1) {
				$favo1 = $dbh->quote($profile{'favorite'}[$id] = shift @{ $res->{text} });
			} elsif ($favo == 2) {
				$favo2 = $dbh->quote($profile{'favorite'}[$id] = shift @{ $res->{text} });
			} elsif ( $favo == 3) {
				$favo3 = $dbh->quote($profile{'favorite'}[$id] = shift @{ $res->{text} });
			}
		}
	}
	if ($favo == 1) {
		$sth = $dbh->prepare("update mixi set favorite = $favo1 where id = \'$id\'"); 		
		$sth->execute;
	} elsif ($favo == 2) {		
		$sth = $dbh->prepare("update mixi set favorite = $favo1 $favo2 where id = \'$id\'"); 
		$sth->execute;
	} elsif ($favo == 3) {
		$sth = $dbh->prepare("update mixi set favorite = $favo1 $favo2 $favo3 where id = \'$id\'"); 		
		$sth->execute;
	}
	$sth->finish;
}
$dbh->disconnect;

あ、quoteしてない。