01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
2009 : 01 02 03 04 05 06 07 08 09 10 11 12
2008 : 01 02 03 04 05 06 07 08 09 10 11 12
2007 : 01 02 03 04 05 06 07 08 09 10 11 12
2006 : 01 02 03 04 05 06 07 08 09 10 11 12
2005 : 01 02 03 04 05 06 07 08 09 10 11 12
2004 : 01 02 03 04 05 06 07 08 09 10 11 12
$ cd ./O2-tools-2.00
$ ./configure --prefix=/usr/local/ ; make ; sudo make install
$ cd ../nhocr-0.16
$ ./configure --prefix=/usr/local/ --with-O2tools=/usr/local ; make ; sudo make install
$ setenv NHOCR_DICDIR /usr/local/share
$ convert input.jpg input.pgm
$ nhocr -line -o output.txt test/hello.pgm
$ cd ../iulib-0.3
$ ./configure ; make ; sudo make install
$ cd ..
$ svn checkout http://tesseract-ocr.googlecode.com/svn/trunk/ tesseract-ocr
$ cd tesseract-ocr
$ ./configure ; make ; sudo make install
$ cd ../ocropus-0.3
$ ./configure --without-fst --without-leptonica; make ; sudo make install
$ cd ../nhocr-0.16/
$ vi ocropus/rec-nhocr-0.3.lua
/opt/nhocr -> /usr/local
$ sudo cp ocropus/rec-nhocr-0.3.lua /usr/local/share/ocropus/scripts/rec-nhocr.lua
$ setenv OCROSCRIPTS /usr/local/share/ocropus/scripts
// $ ocroscript recognize data/pages/alice_1.png
$ ocroscript rec-nhocr ~/Desktop/Screenshot.png
$ gs -dSAFER -dBATCH -dNOPAUSE -sDEVICE=jpeg -r300 -sOutputFile=kuwabara_01.%02d.pdf.jpg kuwabara_01.pdf
// ここまでは完璧.
// 後は、レイアウト。でも、MySQLに登録して、マッチングをかけるだけなら、もうできるはず。
$ cd ../weocr-toolkit-0.13
$ ./configure --prefix=/usr/local/ --with-O2tools=/usr/local ; make ; sudo make install
$ vi bin/execocr_tesseract.tmpl html/* cgi-bin/*
/opt/weocr -> /usr/local
/cgi-bin -> /weocr
$ sudo cp bin/execocr_tesseract.tmpl /usr/local/bin/execocr_tesseract
$ cd bin/; sudo cp filenorm run_xalan wo_cleandir /usr/local/bin ; cd -
$ mkdir /var/www/html/weocr/; cp html/* cgi-bin/* /var/www/html/weocr
$ sudo vi /etc/httpd/conf/httpd.conf
AddHandler cgi-script .cgi
Options FollowSymLinks ExecCGI
AllowOverride None
$ cd /var/www/html/weocr/; make ; cd -
(Location of error unknown)XSLT Error (javax.xml.transform.TransformerConfigurationException): getAssociatedStylesheets failed
make: *** [srvspec.html] Error 255
あれ?
// $ java -cp /usr/share/java/xalan-j2.jar org.apache.xalan.xslt.Process -HTML -IN srvspec.xml -OUT srvspec.html
// javaが悪い。OSをupdateしたらなおるかも。
// ということでpending
nhocrを使って、ocrと全文検索を組み合わせてみた
2009 : 01 02 03 04 05 06 07 08 09 10 11 12
2008 : 01 02 03 04 05 06 07 08 09 10 11 12
2007 : 01 02 03 04 05 06 07 08 09 10 11 12
2006 : 01 02 03 04 05 06 07 08 09 10 11 12
2005 : 01 02 03 04 05 06 07 08 09 10 11 12
2004 : 01 02 03 04 05 06 07 08 09 10 11 12
最終更新時間: 2009-05-28 23:58