Lucene 소스를 다운받아 보면 소스와 라이브러리, Jar 파일만 들어있기
때문에 처음접하는 사람의 경우 어떻게 시작해야 할지 당황하기 쉽다.
lucene의 자매품 같은 프로젝트로 nutch가 있는데
nutch에는 bin 폴더 밑에 "nutch"라는 실행 스크립트가 들어있다.
이를 수정해서 lucene용 스크립트로 사용하면 많은 수정없이
lucene에 맞게 사용할수 있다.
다음은 nutch 스크립트를 수정해서 만든 lucene 스크립트이다.
스크립트를 사용하려면 lucene폴더 하에 bin 폴더를 만들고
스크립트를 lucene이라는 이름으로 저장하고 실행하면 된다.
우선 실행하는 방법을 보자.
bin/lucene IndexFiles [Option]과 같이 간단하게 실행할수 있다.
보면 알겠지만 스크립트에는 contrib와 기타 jar에 포함되어 있는
모든 main을 포함하는 클래스를 포함시켰다.
스크립트의 소스는 다음과 같다.
#!/bin/bash
if [ $# = 0 ]; then
echo "Usage: lucene COMMAND"
echo "where COMMAND is one of:"
echo "DeleteFiles"
echo "IndexFiles"
echo "IndexHTML"
echo "IndexTrec"
echo "IndexPPT"
echo "SearchFiles"
echo "HTMLParseTest"
echo "PorterStemmer"
echo "CheckIndex"
echo "IndexReader"
echo "QueryParser"
echo "--------------- contrib ----------------"
echo "PatternParser"
echo "TernaryTree"
echo "Benchmark"
echo "precisionrecall"
echo "EvaluationTrec"
echo "programSample"
echo "QueryDriver"
echo "QualityQueriesFinder"
echo "ExtractReuters"
echo "ExtractWikipedia"
echo "SanityLoadLibrary"
echo "FieldTermStack"
echo "Lucli"
echo "FieldNormModifier"
echo "IndexSplitter"
echo "MultiPassIndexSplitter"
echo "HighFreqTerms"
echo "IndexMergeTool"
echo "PrecedenceQueryParser"
echo "MoreLikeThis"
echo "RemoteSearchable"
echo "TestRemoteSort"
echo "SnowballTestApp"
echo "GeoHashUtils"
echo "ListSearcherSimulater"
echo "SynExpand"
echo "SynLookup"
echo "Syns2Index"
echo "or"
echo " CLASSNAME run the class named CLASSNAME"
exit 1
fi
# get arguments
COMMAND=$1
shift
# some directories
THIS_DIR=`dirname "$THIS"`
#LUCENE_HOME=`cd "$THIS_DIR/.." ; pwd`
LUCENE_HOME=`echo $LUCENE_HOME`
# some Java parameters
if [ "$LUCENE_JAVA_HOME" != "" ]; then
#echo "run java in $LUCENE_JAVA_HOME"
JAVA_HOME=$LUCENE_JAVA_HOME
fi
if [ "$JAVA_HOME" = "" ]; then
echo "Error: JAVA_HOME is not set."
exit 1
fi
JAVA=$JAVA_HOME/bin/java
JAVA_HEAP_MAX=-Xmx1000m
# check envvars which might override default args
if [ "$LUCENE_HEAPSIZE" != "" ]; then
#echo "run with heapsize $LUCENE_HEAPSIZE"
JAVA_HEAP_MAX="-Xmx""$LUCENE_HEAPSIZE""m"
#echo $JAVA_HEAP_MAX
fi
CLASSPATH=$LUCENE_HOME/build/lucene-core-3.0.1-dev.jar:$LUCENE_HOME/build/lucene-demos-3.0.1-dev.jar
CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
#CLASSPATH=${CLASSPATH}:$LUCENE_HOME/build/classes
# add contrib to classpath
#for f in $LUCENE_HOME/build/lib-contrib/*.jar; do
# CLASSPATH=${CLASSPATH}:$f;
#done
# add libs to CLASSPATH
for f in $LUCENE_HOME/lib/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
LUCENE_OPTS="$LUCENE_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
fi
# figure out which class to run
if [ "$COMMAND" = "DeleteFiles" ] ; then
CLASS=org.apache.lucene.demo.DeleteFiles
elif [ "$COMMAND" = "IndexFiles" ] ; then
CLASS=org.apache.lucene.demo.IndexFiles
elif [ "$COMMAND" = "IndexHTML" ] ; then
CLASS=org.apache.lucene.demo.IndexHTML
elif [ "$COMMAND" = "IndexTrec" ] ; then
CLASS=kr.ac.kaist.demo.IndexTrec
#CLASS=org.apache.lucene.demo.IndexTrec
elif [ "$COMMAND" = "SearchFiles" ] ; then
CLASS=kr.ac.kaist.demo.SearchFiles
elif [ "$COMMAND" = "HTMLParseTest" ] ; then
CLASS=org.apache.lucene.demo.html.Test
elif [ "$COMMAND" = "PorterStemmer" ] ; then
CLASS=org.apache.lucene.analysis.PorterStemmer
elif [ "$COMMAND" = "CheckIndex" ] ; then
CLASS=org.apache.lucene.index.CheckIndex
elif [ "$COMMAND" = "IndexReader" ] ; then
CLASS=org.apache.lucene.IndexReader
elif [ "$COMMAND" = "QueryParser" ] ; then
CLASS=org.apache.lucene.queryParser.QueryParser
elif [ "$COMMAND" = "English" ] ; then
CLASS=org.apache.lucene.util.English
elif [ "$COMMAND" = "PatternParser" ] ; then
CLASS=org.apache.lucene.analysis.compound.hyphenation.PatternParser
elif [ "$COMMAND" = "TernaryTree" ] ; then
CLASS=org.apache.lucene.analysis.compound.hyphenation.TernaryTree
elif [ "$COMMAND" = "Benchmark" ] ; then
CLASS=org.apache.lucene.benchmark.byTask.Benchmark
elif [ "$COMMAND" = "precisionrecall" ] ; then
CLASS=kr.ac.kaist.demo.PrecisionRecall
elif [ "$COMMAND" = "EvaluationTrec" ] ; then
CLASS=kr.ac.kaist.demo.EvaluationTrec
elif [ "$COMMAND" = "programSample" ] ; then
CLASS=org.apache.lucene.benchmark.byTask.programmatic.Sample
elif [ "$COMMAND" = "QueryDriver" ] ; then
CLASS=org.apache.lucene.benchmark.quality.trec.QueryDriver
elif [ "$COMMAND" = "QualityQueriesFinder" ] ; then
CLASS=org.apache.lucene.benchmark.quality.utils.QualityQueriesFinder
elif [ "$COMMAND" = "ExtractReuters" ] ; then
CLASS=org.apache.lucene.benchmark.utils.ExtractWikipedia
elif [ "$COMMAND" = "ExtractWikipedia" ] ; then
CLASS=org.apache.lucene.benchmark.utils.ExtractWikipedia
elif [ "$COMMAND" = "SanityLoadLibrary" ] ; then
CLASS=org.apache.lucene.store.db.SanityLoadLibrary
elif [ "$COMMAND" = "FieldTermStack" ] ; then
CLASS=org.apache.lucene.search.vectorhighlight.FieldTermStack
elif [ "$COMMAND" = "Lucli" ] ; then
CLASS=lucli.Lucli
elif [ "$COMMAND" = "FieldNormModifier" ] ; then
CLASS=org.apache.lucene.index.FieldNormModifier
elif [ "$COMMAND" = "IndexSplitter" ] ; then
CLASS=org.apache.lucene.index.IndexSplitter
elif [ "$COMMAND" = "MultiPassIndexSplitter" ] ; then
CLASS=org.apache.lucene.index.MultiPassIndexSplitter
elif [ "$COMMAND" = "HighFreqTerms" ] ; then
CLASS=org.apache.lucene.misc.HighFreqTerms
elif [ "$COMMAND" = "IndexMergeTool" ] ; then
CLASS=org.apache.lucene.misc.IndexMergeTool
elif [ "$COMMAND" = "PrecedenceQueryParser" ] ; then
CLASS=org.apache.lucene.
elif [ "$COMMAND" = "MoreLikeThis" ] ; then
CLASS=org.apache.lucene.search.similar.MoreLikeThis
elif [ "$COMMAND" = "RemoteSearchable" ] ; then
CLASS=org.apache.lucene.search.RemoteSearchable
elif [ "$COMMAND" = "TestRemoteSort" ] ; then
CLASS=org.apache.lucene.search.TestRemoteSort
elif [ "$COMMAND" = "SnowballTestApp" ] ; then
CLASS=org.tartarus.snowball.TestApp
elif [ "$COMMAND" = "GeoHashUtils" ] ; then
CLASS=org.apache.lucene.spatial.geohash.GeoHashUtils
elif [ "$COMMAND" = "ListSearcherSimulater" ] ; then
CLASS=org.apache.lucene.swing.models.ListSearcherSimulator
elif [ "$COMMAND" = "SynExpand" ] ; then
CLASS=org.apache.lucene.wordnet.SynExpand
elif [ "$COMMAND" = "SynLookup" ] ; then
CLASS=org.apache.lucene.wordnet.SynLookup
elif [ "$COMMAND" = "Syns2Index" ] ; then
CLASS=org.apache.lucene.wordnet.Syns2Index
else
CLASS=$COMMAND
fi
exec "$JAVA" $JAVA_HEAP_MAX $LUCENE_OPTS -cp "$CLASSPATH" $CLASS "$@"
댓글 없음:
댓글 쓰기