wget -c "http://lz.book.sohu.com/serialize-id-12171.html" -O index.raw iconv -f GBK -t UTF-8 index.raw > index.raw.utf mv -f index.raw.utf index.raw # find lines containing chapter links sed -n '/