#!/usr/bin/bash ############################################################################### # name:Robot # date:2012-11-09 # desc:download porn pictures from baixingsex ############################################################################### FIRSTUSE=64 USER_CHOOSE_QUIT=65 UNKNOWN_CHOICE=66 BAD_ARGUMENTS=67 ############################################################################### # show usage FIXME:use literal instead of numeric ############################################################################### function Usage { echo -e "Usage: ./Robot.sh [-o outdir] [-t type] [-s startpage] [-e endpage]" echo -e " -t 42 zipai" echo -e " 43 siwa" echo -e " 44 toupai" echo -e " 45 yazhou" echo -e " 46 oumei" echo -e " 47 linglei" echo -e " 48 mingxing" } ############################################################################### # loop for each img url in each page and download it ############################################################################### function RobotImpl { local dir=$1;shift local type=$1;shift local s=$1;shift local e=$(($1+1)) # cache wget files local cache="cache" # change working-directory [ -d $dir ] || mkdir -p $dir cd $dir [ -d $cache ] || mkdir $cache urlPrefix="http://news.baisex.me/forum-"${type}"-" urlSuffix=".html" curPage=$s while [ $curPage -lt $e ] do cd $cache abslUrl=${urlPrefix}${curPage}${urlSuffix} pageHtml=${type}"-"${curPage}".html" echo -e "$abslUrl\t$pageHtml" # check first if file in cache # retry 5 times after each 30s # continue not done yet file [ -e $pageHtml ] || wget -t 5 -w 30 -c $abslUrl -O $pageHtml 1>/dev/null 2>&1 # extract each thread's title and url tmp=${type}"-"${curPage}.tmp title=${type}"-"${curPage}.title awk '/<tbody id="normalthread/{\ getline line;\ while(line!~/<\/tbody>/){\ getline line;\ if(line~/atarget/){\ print line\ }\ }\ next\ }' $pageHtml > $tmp awk '{\ split($0, a, "\"");\ from=index($0,">");\ rst=substr($0,from+1);\ to=index(rst,"<");\ printf("%s\t%s\n",a[2],substr(rst,0,to-1)) }' $tmp > $title cd ../ # now we are in baixingsex/ while read each_title_thread do echo -e "$each_title_thread" titlename=$(echo $each_title_thread | awk 'BEGIN{IFS='\t'}{print $2}') [ -d $titlename ] || mkdir $titlename threadUrl=$(echo $each_title_thread | awk 'BEGIN{IFS='\t'}{print $1}') abslThreadUrl="http://news.baisex.me/"$threadUrl # download each thread content without img # check if in cache [ -e $cache"/"$threadUrl ] || wget -t 5 -w 30 -c $abslThreadUrl -O $cache"/"$threadUrl 1>/dev/null 2>&1 # extract each pic's url from threadUrl # fortunately, it is not hard to extract them cat /dev/null > pic.tmp awk '{if($0~/onload/){split($0,a,"\"");print a[2];}}' $cache"/"$threadUrl > pic.tmp picIdx=1 while read each_pic_url do ext=$(echo $each_pic_url | awk -F'.' '{print $NF}') [ -e $titlename"/"$picIdx"."$ext ] || wget -t 5 -w 30 -c $each_pic_url -O $titlename"/"$picIdx"."$ext 1>/dev/null 2>&1 let "picIdx+=1" done < pic.tmp done < $cache"/"$title # remove tmp file here rm -f $cache"/"$title $cache"/"$tmp let "curPage+=1" done } ############################################################################### # check argument and call RobotImpl ############################################################################### function Robot { # save arg to Robot.arg # in case of crash we could restart it cat /dev/null > Robot.arg echo $* > Robot.arg # set arguments default value local dir=/mnt/windows/baixingsex local type=42 local start=1 local end=10 [ $# -eq 0 ] && Usage && exit $FIRSTUSE # parse command-line arguments while [ $# -gt 0 ] do case $1 in -o) # outdir shift dir=$1 ;; -t) # type shift type=$1 ;; -s) # start page shift start=$1 ;; -e) # end page shift end=$1 ;; *) # default shift ;; esac done # check arguments [ $type -lt 42 ] || [ $type -gt 48 ] || [ $start -gt $end ] && Usage && exit $BAD_ARGUMENTS table=( zipai siwa toupai yazhou oumei linglei mingxing ) echo -e "dir=$dir type=${table[$(($type-42))]} start=$start end=$end" echo -n -e "Are you sure?[y/n]" read y_or_n case $y_or_n in y|yes) RobotImpl $dir $type $start $end ;; n|no) exit $USER_CHOOSE_QUIT ;; *) exit $UNKNOWN_CHOICE ;; esac } ############################################################################### # main entry ############################################################################### Robot $* #!/usr/bin/bash ############################################################################### # name:Daemon.sh # date:2012-11-09 # desc:run in background and monitor Robot process restart it when dead ############################################################################### ROBOT=Robot.sh LOG=Robot.log ARG=Robot.arg function Daemon { while [ 1 ] do PID=$(ps aux | grep $ROBOT | grep -v "grep" | awk '{print $2}') if [ -z $PID ] then # Robot is dead log time first currenttime=$(date "+%Y-%m-%d %H:%M:%S") echo -e $currenttime" [dead]" >> $LOG # reload arg from Robot.arg restart it arg=$(cat $ARG) nohup ./Robot.sh $arg & currenttime=$(date "+%Y-%m-%d %H:%M:%S") echo -e $currenttime" [restart]" >> $LOG else # too verbose ? currenttime=$(date "+%Y-%m-%d %H:%M:%S") echo -e $currenttime" [alive]" >> $LOG fi # relax CPU sleep 30 done } ############################################################################### # main entry ############################################################################### Daemon #!/usr/bin/bash ############################################################################### # name:Shutdown.sh # date:2012-11-10 # desc:shutdown Robot.sh and Daemon.sh ############################################################################### ROBOT=Robot.sh DAEMON=Daemon.sh function shutdown { local waittime=2 echo -e "shutdown..." ROBOT_PID=$(ps aux | grep $ROBOT | grep -v "grep" | awk '{print $2}') DAEMON_PID=$(ps aux | grep $DAEMON | grep -v "grep" | awk '{print $2}') if [ ! -z $DAEMON_PID ];then kill $DAEMON_PID fi if [ ! -z $ROBOT_PID ];then kill $ROBOT_PID fi sleep $waittime ROBOT_PID=$(ps aux | grep $ROBOT | grep -v "grep" | awk '{print $2}') DAEMON_PID=$(ps aux | grep $DAEMON | grep -v "grep" | awk '{print $2}') if [[ -z $DAEMON_PID ]] && [[ -z $ROBOT_PID ]];then titlenum=$(awk -F'\t' '{print $2}' /mnt/windows/baixingsex/Robot.download | sort -u | wc -l) echo -e "Robot has been shutdown" echo -e "total $titlenum threads downloaded" else echo -e "Failed to shutdown Robot" fi exit 0 } ############################################################################### # main entry ############################################################################### shutdown