2011년 7월 16일 토요일

lucene의 실행 script

 Lucene 소스를 다운받아 보면 소스와 라이브러리, Jar 파일만 들어있기

 때문에 처음접하는 사람의 경우 어떻게 시작해야 할지 당황하기 쉽다.

 lucene의 자매품 같은 프로젝트로 nutch가 있는데

 nutch에는 bin 폴더 밑에 "nutch"라는 실행 스크립트가 들어있다.

 이를 수정해서 lucene용 스크립트로 사용하면 많은 수정없이

 lucene에 맞게 사용할수 있다.

 다음은 nutch 스크립트를 수정해서 만든 lucene 스크립트이다.

 스크립트를 사용하려면 lucene폴더 하에 bin 폴더를 만들고

 스크립트를 lucene이라는 이름으로 저장하고 실행하면 된다.

 우선 실행하는 방법을 보자.












 bin/lucene IndexFiles [Option]과 같이 간단하게 실행할수 있다.

 보면 알겠지만 스크립트에는 contrib와 기타 jar에 포함되어 있는

 모든 main을 포함하는 클래스를 포함시켰다.

스크립트의 소스는 다음과 같다.

 #!/bin/bash

if [ $# = 0 ]; then
  echo "Usage: lucene COMMAND"
  echo "where COMMAND is one of:"
  echo "DeleteFiles"
  echo "IndexFiles"
  echo "IndexHTML"
  echo "IndexTrec"
  echo "IndexPPT"
  echo "SearchFiles"
  echo "HTMLParseTest"
  echo "PorterStemmer"
  echo "CheckIndex"
  echo "IndexReader"
  echo "QueryParser"
  echo "--------------- contrib ----------------"
  echo "PatternParser"
  echo "TernaryTree"
  echo "Benchmark"
  echo "precisionrecall"
  echo "EvaluationTrec"
  echo "programSample"
  echo "QueryDriver"
  echo "QualityQueriesFinder"
  echo "ExtractReuters"
  echo "ExtractWikipedia"
  echo "SanityLoadLibrary"
  echo "FieldTermStack"
  echo "Lucli"
  echo "FieldNormModifier"
  echo "IndexSplitter"
  echo "MultiPassIndexSplitter"
  echo "HighFreqTerms"
  echo "IndexMergeTool"
  echo "PrecedenceQueryParser"
  echo "MoreLikeThis"
  echo "RemoteSearchable"
  echo "TestRemoteSort"
  echo "SnowballTestApp"
  echo "GeoHashUtils"
  echo "ListSearcherSimulater"
  echo "SynExpand"
  echo "SynLookup"
  echo "Syns2Index"
  echo "or"
  echo " CLASSNAME                  run the class named CLASSNAME"
  exit 1
fi

# get arguments
COMMAND=$1
shift

# some directories
THIS_DIR=`dirname "$THIS"`
#LUCENE_HOME=`cd "$THIS_DIR/.." ; pwd`
LUCENE_HOME=`echo $LUCENE_HOME`

# some Java parameters
if [ "$LUCENE_JAVA_HOME" != "" ]; then
  #echo "run java in $LUCENE_JAVA_HOME"
  JAVA_HOME=$LUCENE_JAVA_HOME
fi

if [ "$JAVA_HOME" = "" ]; then
  echo "Error: JAVA_HOME is not set."
  exit 1
fi

JAVA=$JAVA_HOME/bin/java
JAVA_HEAP_MAX=-Xmx1000m

# check envvars which might override default args
if [ "$LUCENE_HEAPSIZE" != "" ]; then
  #echo "run with heapsize $LUCENE_HEAPSIZE"
  JAVA_HEAP_MAX="-Xmx""$LUCENE_HEAPSIZE""m"
  #echo $JAVA_HEAP_MAX
fi

CLASSPATH=$LUCENE_HOME/build/lucene-core-3.0.1-dev.jar:$LUCENE_HOME/build/lucene-demos-3.0.1-dev.jar
CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
#CLASSPATH=${CLASSPATH}:$LUCENE_HOME/build/classes

# add contrib to classpath
#for f in $LUCENE_HOME/build/lib-contrib/*.jar; do
#  CLASSPATH=${CLASSPATH}:$f;
#done

# add libs to CLASSPATH
for f in $LUCENE_HOME/lib/*.jar; do
  CLASSPATH=${CLASSPATH}:$f;
done

if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
  LUCENE_OPTS="$LUCENE_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
fi

# figure out which class to run
if [ "$COMMAND" = "DeleteFiles" ] ; then
    CLASS=org.apache.lucene.demo.DeleteFiles
elif [ "$COMMAND" = "IndexFiles" ] ; then
    CLASS=org.apache.lucene.demo.IndexFiles
elif [ "$COMMAND" = "IndexHTML" ] ; then
    CLASS=org.apache.lucene.demo.IndexHTML
elif [ "$COMMAND" = "IndexTrec" ] ; then
    CLASS=kr.ac.kaist.demo.IndexTrec
    #CLASS=org.apache.lucene.demo.IndexTrec
elif [ "$COMMAND" = "SearchFiles" ] ; then
    CLASS=kr.ac.kaist.demo.SearchFiles
elif [ "$COMMAND" = "HTMLParseTest" ] ; then
    CLASS=org.apache.lucene.demo.html.Test
elif [ "$COMMAND" = "PorterStemmer" ] ; then
    CLASS=org.apache.lucene.analysis.PorterStemmer
elif [ "$COMMAND" = "CheckIndex" ] ; then
    CLASS=org.apache.lucene.index.CheckIndex
elif [ "$COMMAND" = "IndexReader" ] ; then
    CLASS=org.apache.lucene.IndexReader
elif [ "$COMMAND" = "QueryParser" ] ; then
    CLASS=org.apache.lucene.queryParser.QueryParser
elif [ "$COMMAND" = "English" ] ; then
    CLASS=org.apache.lucene.util.English
elif [ "$COMMAND" = "PatternParser" ] ; then
    CLASS=org.apache.lucene.analysis.compound.hyphenation.PatternParser
elif [ "$COMMAND" = "TernaryTree" ] ; then
    CLASS=org.apache.lucene.analysis.compound.hyphenation.TernaryTree
elif [ "$COMMAND" = "Benchmark" ] ; then
    CLASS=org.apache.lucene.benchmark.byTask.Benchmark
elif [ "$COMMAND" = "precisionrecall" ] ; then
    CLASS=kr.ac.kaist.demo.PrecisionRecall
elif [ "$COMMAND" = "EvaluationTrec" ] ; then
    CLASS=kr.ac.kaist.demo.EvaluationTrec
elif [ "$COMMAND" = "programSample" ] ; then
    CLASS=org.apache.lucene.benchmark.byTask.programmatic.Sample
elif [ "$COMMAND" = "QueryDriver" ] ; then
    CLASS=org.apache.lucene.benchmark.quality.trec.QueryDriver
elif [ "$COMMAND" = "QualityQueriesFinder" ] ; then
    CLASS=org.apache.lucene.benchmark.quality.utils.QualityQueriesFinder
elif [ "$COMMAND" = "ExtractReuters" ] ; then
    CLASS=org.apache.lucene.benchmark.utils.ExtractWikipedia
elif [ "$COMMAND" = "ExtractWikipedia" ] ; then
    CLASS=org.apache.lucene.benchmark.utils.ExtractWikipedia
elif [ "$COMMAND" = "SanityLoadLibrary" ] ; then
    CLASS=org.apache.lucene.store.db.SanityLoadLibrary
elif [ "$COMMAND" = "FieldTermStack" ] ; then
    CLASS=org.apache.lucene.search.vectorhighlight.FieldTermStack
elif [ "$COMMAND" = "Lucli" ] ; then
    CLASS=lucli.Lucli
elif [ "$COMMAND" = "FieldNormModifier" ] ; then
    CLASS=org.apache.lucene.index.FieldNormModifier
elif [ "$COMMAND" = "IndexSplitter" ] ; then
    CLASS=org.apache.lucene.index.IndexSplitter
elif [ "$COMMAND" = "MultiPassIndexSplitter" ] ; then
    CLASS=org.apache.lucene.index.MultiPassIndexSplitter
elif [ "$COMMAND" = "HighFreqTerms" ] ; then
    CLASS=org.apache.lucene.misc.HighFreqTerms
elif [ "$COMMAND" = "IndexMergeTool" ] ; then
    CLASS=org.apache.lucene.misc.IndexMergeTool
elif [ "$COMMAND" = "PrecedenceQueryParser" ] ; then
    CLASS=org.apache.lucene.
elif [ "$COMMAND" = "MoreLikeThis" ] ; then
    CLASS=org.apache.lucene.search.similar.MoreLikeThis
elif [ "$COMMAND" = "RemoteSearchable" ] ; then
    CLASS=org.apache.lucene.search.RemoteSearchable
elif [ "$COMMAND" = "TestRemoteSort" ] ; then
    CLASS=org.apache.lucene.search.TestRemoteSort
elif [ "$COMMAND" = "SnowballTestApp" ] ; then
    CLASS=org.tartarus.snowball.TestApp
elif [ "$COMMAND" = "GeoHashUtils" ] ; then
    CLASS=org.apache.lucene.spatial.geohash.GeoHashUtils
elif [ "$COMMAND" = "ListSearcherSimulater" ] ; then
    CLASS=org.apache.lucene.swing.models.ListSearcherSimulator
elif [ "$COMMAND" = "SynExpand" ] ; then
    CLASS=org.apache.lucene.wordnet.SynExpand
elif [ "$COMMAND" = "SynLookup" ] ; then
    CLASS=org.apache.lucene.wordnet.SynLookup
elif [ "$COMMAND" = "Syns2Index" ] ; then
    CLASS=org.apache.lucene.wordnet.Syns2Index
else
  CLASS=$COMMAND
fi

exec "$JAVA" $JAVA_HEAP_MAX $LUCENE_OPTS -cp "$CLASSPATH" $CLASS "$@"

댓글 없음:

댓글 쓰기