Nutch 命令

http://blog.csdn.net/east271536394/article/details/8962720

/usr/local/hbase/bin/stop-hbase.sh

/usr/local/hbase/bin/start-hbase.sh

 bin/nutch inject urls

bin/nutch generate -topN 20
bin/nutch fetch -all
bin/nutch parse -all
bin/nutch updatedb

最新爬取内容 命令

bin/nutch inject urls

bin/nutch generate -topN 2000

bin/nutch fetch -all

bin/nutch parse -all

bin/nutch updatedb

 bin/nutch  solrindex http://127.0.0.1:8080/solr/  -reindex

2.

./bin/nutch inject urls

bin/nutch crawl urls -dir urls -depth 20 -topN 500000 -threads 4&>crawl.log &

bin/nutch crawl urls -dir urls -depth 6 -topN 2000 -threads 2&>crawl.log &

bin/nutch crawl urls -dir urls -depth 6 -topN 1000 -threads 1>crawl.log &

bin/nutch crawl urls -dir crawl.demo -depth 2 -threads 4 >& crawl.log

bin/nutch crawl urls -dir urls  -depth 2 -threads 4 >& crawl.log

 bin/nutch  solrindex http://127.0.0.1:8080/solr/  -reindex

./bin/nutch solrindex http://127.0.0.1:8080/solr/ -reindex