[83880] trunk/dports/java/apache-solr
hum at macports.org
hum at macports.org
Mon Sep 12 07:50:29 PDT 2011
Revision: 83880
http://trac.macports.org/changeset/83880
Author: hum at macports.org
Date: 2011-09-12 07:50:27 -0700 (Mon, 12 Sep 2011)
Log Message:
-----------
apache-solr: enhance the Portfile; add ja variant for Japanese texts; see #31155.
Modified Paths:
--------------
trunk/dports/java/apache-solr/Portfile
trunk/dports/java/apache-solr/files/solr.in
Added Paths:
-----------
trunk/dports/java/apache-solr/files/patch-solr-ja.diff
trunk/dports/java/apache-solr/files/solr-ja.xml
trunk/dports/java/apache-solr/files/velocity.properties
Modified: trunk/dports/java/apache-solr/Portfile
===================================================================
--- trunk/dports/java/apache-solr/Portfile 2011-09-12 14:46:02 UTC (rev 83879)
+++ trunk/dports/java/apache-solr/Portfile 2011-09-12 14:50:27 UTC (rev 83880)
@@ -5,48 +5,89 @@
name apache-solr
version 3.3.0
-categories java search
+revision 1
+categories java textproc
platforms darwin
-maintainers gmail.com:haya10.ito openmaintainer
+maintainers gmail.com:haya10.ito hum openmaintainer
license Apache-2.0
-description The open source enterprise search platform
-
+homepage http://lucene.apache.org/solr/
+description An open source enterprise search platform.
long_description Solr is the popular, blazing fast open source enterprise \
search platform from the Apache Lucene project.
-homepage http://lucene.apache.org/solr/
master_sites apache:lucene/solr/${version}/
-
+extract.suffix .tgz
checksums sha1 e6017419051e5eda9cb222e3b17006eeed682db9 \
rmd160 ef2c2e58d44a9d8786c2fc5b2f19f59bf9e1f70c
-extract.suffix .tgz
+# set the destination paths.
+set java_basepath ${prefix}/share/java
+set solr_destpath ${java_basepath}/${distname}
+set solr_path ${solr_destpath}/example
+set solr_home ${solr_path}/solr
+post-patch {
+ # expand relative pathes into abusolute ones.
+ reinplace "s|\"\.\./\.\./|\"${solr_destpath}/|g" \
+ ${worksrcpath}/example/solr/conf/solrconfig.xml
+}
+
use_configure no
supported_archs noarch
build {}
-set java_home /System/Library/Frameworks/JavaVM.framework/Versions/1.6/Home
+destroot {
+ # copy the distribution.
+ xinstall -d ${destroot}${java_basepath}
+ copy ${worksrcpath} ${destroot}${java_basepath}
+ # install the solr script.
+ xinstall -m 755 ${filespath}/solr.in ${destroot}${prefix}/bin/solr
+ reinplace "s|@solr_path@|${solr_path}|g" ${destroot}${prefix}/bin/solr
+ reinplace "s|@solr_home@|${solr_home}|g" ${destroot}${prefix}/bin/solr
+}
-pre-configure {
- if {![file exists ${java_home}]} {
- ui_error "Java 1.6 is required, but not located at ${java_home}"
- return -code error "Java 1.6 missing"
+notes "
+To try apache-solr, run 'sudo solr' and open http://localhost:8983/solr/browse.
+To store sample documents, run 'cd ${solr_path}/exampledocs && ./post.sh *.xml'."
+
+# solr home for Japanese configurations.
+set solr_home_ja ${solr_home}-ja
+
+variant ja description {Add Japanese settings with lucene-gosen} {
+ depends_run-append port:lucene-gosen
+ # create Japanese solr home 'solr-ja'.
+ post-extract {
+ copy ${worksrcpath}/example/solr ${worksrcpath}/example/solr-ja
}
-}
+ patchfiles-append patch-solr-ja.diff
+ post-patch {
+ # expand relative pathes into abusolute ones.
+ reinplace "s|\"\.\./\.\./|\"${solr_destpath}/|g" \
+ ${worksrcpath}/example/solr-ja/conf/solrconfig.xml
+ }
+ post-destroot {
+ # set the lucene-gosen configuration path to config files.
+ foreach config {schema.xml solrconfig.xml} {
+ reinplace "s|@gosen_path@|${java_basepath}/lucene-gosen|g" \
+ ${destroot}${solr_home_ja}/conf/${config}
+ }
+ # install a property file for UTF-8 encoding.
+ copy ${filespath}/velocity.properties ${destroot}${solr_home_ja}/conf
+ # copy a sample Japanese doc for testing.
+ copy ${filespath}/solr-ja.xml ${destroot}${solr_path}/exampledocs
+ # install the solr-ja script.
+ xinstall -m 755 ${filespath}/solr.in ${destroot}${prefix}/bin/solr-ja
+ reinplace "s|@solr_path@|${solr_path}|g" ${destroot}${prefix}/bin/solr-ja
+ reinplace "s|@solr_home@|${solr_home_ja}|g" ${destroot}${prefix}/bin/solr-ja
+ }
+ notes-append "
-set target ${prefix}/share/java/${name}-${version}
-
-destroot {
- xinstall -d ${destroot}[file dirname ${target}]
- copy ${worksrcpath}/example ${destroot}${target}
- xinstall -m 755 ${filespath}/solr.in ${destroot}${prefix}/bin/solr
- reinplace "s|@TARGET@|${target}|g" ${destroot}${prefix}/bin/solr
- reinplace "s|@JAVA_HOME@|${java_home}|g" ${destroot}${prefix}/bin/solr
+For Japanese texts, please run 'sudo solr-ja' instead of 'sudo solr'.
+See ${solr_home_ja}."
}
livecheck.type regex
-livecheck.url http://mirrors.ibiblio.org/pub/mirrors/apache/lucene/solr/
-livecheck.regex {href="([0-9.]+)"}
+livecheck.url http://www.apache.org/dist/lucene/solr/
+livecheck.regex (\[0-9.\]+)\/
Added: trunk/dports/java/apache-solr/files/patch-solr-ja.diff
===================================================================
--- trunk/dports/java/apache-solr/files/patch-solr-ja.diff (rev 0)
+++ trunk/dports/java/apache-solr/files/patch-solr-ja.diff 2011-09-12 14:50:27 UTC (rev 83880)
@@ -0,0 +1,141 @@
+--- example/solr-ja/conf/schema.xml.orig 2011-09-03 23:57:07.000000000 +0900
++++ example/solr-ja/conf/schema.xml 2011-09-05 23:56:02.000000000 +0900
+@@ -467,6 +467,92 @@
+ See http://wiki.apache.org/solr/SpatialSearch
+ -->
+ <fieldtype name="geohash" class="solr.GeoHashField"/>
++
++ <!-- configuration for japanese text, using a morphological analyzer
++ Most possibilities for customization are specified here in the schema.
++
++ Note: you can set the default query operator to be OR, AND, or PHRASE:
++ OR: Use these defaults (autoGeneratePhraseQueries="false", <solrQueryParser defaultOperator="OR"/>
++ In this case Solr works like it does with the English language. The default query is OR,
++ but documents that contain more of the query terms get a special boost. You can probably
++ use a less aggressive stopwords/stoptags in this case, and its probably a good idea to use
++ enablePositionIncrements=true, so that if a user puts a query in quotes, they get a much more
++ exact phrase query.
++ AND: Set autoGeneratePhraseQueries=false, but set <solrQueryParser defaultOperator="AND"/> in
++ your schema.xml. Note if you do this, you should use a more aggressive stopwords/stoptags
++ list (at least at query-time), otherwise a document might not match simply because it does
++ not contain a prefix or particle. As in the above case, its probably a good idea to use
++ enablePositionIncrements=true for explicit phrase queries from the user.
++ PHRASE: Set autoGeneratePhraseQueries=true. If you do this, you should probably use both a very
++ aggressive stopwords list, and you should probably also set enablePositionIncrements=false
++ everywhere. Otherwise, even documents that contain the query's phrase in exact order will
++ not match because of slightly different grammatical structure.
++ -->
++ <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
++ <analyzer>
++ <!-- map characters before the tokenizer:
++ Optionally, instead of the JapaneseWidthFactory, you can choose to do the width
++ mappings before the text is sent to the tokenizer.
++ <charFilter class="solr.MappingCharFilterFactory" mapping="@gosen_path@/conf/mapping-japanese.txt"/>
++ -->
++
++ <!-- morphological tokenizer: sets the SURFACE form as the token, but also sets these attributes:
++ BasicFormAttribute, ConjugationAttribute, PartOfSpeechAttribute, PronunciationsAttribute,
++ ReadingsAttribute, and SentenceStartAttribute.
++ -->
++ <tokenizer class="solr.JapaneseTokenizerFactory"/>
++
++ <!-- normalizes CJK width differences:
++ 1. Folds fullwidth ASCII variants into the equivalent basic latin
++ 2. Folds halfwidth Katakana variants into the equivalent kana
++
++ Note: alternatively you can use a MappingCharFilter before the tokenizer for this, but please note
++ that mapping characters can change how Sen tokenizes text.
++ -->
++ <filter class="solr.JapaneseWidthFilterFactory"/>
++
++ <!-- the punctuation filter removes all-punctuation tokens base on Unicode properties.
++ punctuation tokens are tagged as "unknown", and its better to do this than to remove
++ tokens with an unknown pos (as they might be valuable!). Because this punctuation
++ usually signifies a phrase or sentence boundary, enablePositionIncrements can be
++ used to prevent phrase queries from matching across natural phrase/sentence boundaries -->
++ <filter class="solr.JapanesePunctuationFilterFactory" enablePositionIncrements="true"/>
++
++ <!-- this is a part-of-speech based stopfilter, it removes any tokens that have a certain
++ of speech. you can set enablePositionIncrements for tighter phrase queries -->
++ <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="@gosen_path@/conf/stoptags_ja.txt" enablePositionIncrements="true"/>
++
++ <!-- a standard stopfilter, to specify specific stopwords. -->
++ <filter class="solr.StopFilterFactory" ignoreCase="true" words="@gosen_path@/conf/stopwords_ja.txt" enablePositionIncrements="true"/>
++
++ <!-- alternatively, instead of using a part-of-speech based stopfilter, you can use a
++ part-of-speech based keepfilter: specifying only the parts of speech you wish to index.
++ anything else will be removed. HOWEVER: this could be a little dangerous, because if
++ we upgrade ipadic they might add some new tags (the tags are fairly specific), and suddenly
++ things that you were indexing before are no longer being indexed. Its recommended to
++ use the part-of-speech based stopfilter above if at all possible, for safety.
++ <filter class="solr.JapanesePartOfSpeechKeepFilterFactory" tags="@gosen_path@/conf/keeptags_ja.txt" enablePositionIncrements="true"/>
++ -->
++
++ <!-- before any stemming/lemmatization, you can protect words from being modified by specifying
++ a protwords.txt.
++ <filter class="solr.KeywordMarkerFilterFactory" protected="@gosen_path@/conf/protwords_ja.txt" ignoreCase="false"/>
++
++ or you can also supply a custom stem dictionary for inflected forms (tab separated). No
++ further stemming/lemmatization will modify this.
++ <filter class="solr.StemmerOverrideFilterFactory" dictionary="dictionary.txt" ignoreCase="false"/>
++ -->
++
++ <!-- the basic form filter converts inflected verbs and adjectives to their dictionary citation form. -->
++ <filter class="solr.JapaneseBasicFormFilterFactory"/>
++
++ <!-- this filter heuristically normalizes katakana forms with a final prolonged sound mark -->
++ <filter class="solr.JapaneseKatakanaStemFilterFactory"/>
++
++ <!-- you might want to lowercase for any english text content you have -->
++ <filter class="solr.LowerCaseFilterFactory"/>
++ </analyzer>
++ </fieldType>
+ </types>
+
+
+@@ -533,7 +619,7 @@
+
+ <!-- catchall field, containing all other searchable text fields (implemented
+ via copyField further on in this schema -->
+- <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
++ <field name="text" type="text_ja" indexed="true" stored="false" multiValued="true"/>
+
+ <!-- catchall text field that indexes tokens both normally and in reverse for efficient
+ leading wildcard queries. -->
+--- example/solr-ja/conf/solrconfig.xml.orig 2011-09-04 00:16:51.000000000 +0900
++++ example/solr-ja/conf/solrconfig.xml 2011-09-06 00:05:40.000000000 +0900
+@@ -80,6 +80,7 @@
+ is found that matches, it will be ignored
+ -->
+ <lib dir="../../contrib/clustering/lib/" />
++ <lib dir="@gosen_path@/lib/" />
+ <lib dir="/total/crap/dir/ignored" />
+ <!-- an exact path can be used to specify a specific file. This
+ will cause a serious error to be logged if it can't be loaded.
+@@ -780,6 +781,7 @@
+ <str name="wt">velocity</str>
+
+ <str name="v.template">browse</str>
++ <str name="v.properties">velocity.properties</str>
+ <str name="v.layout">layout</str>
+ <str name="title">Solritas</str>
+
+--- example/solr-ja/conf/velocity/head.vm.orig 2011-09-03 23:57:07.000000000 +0900
++++ example/solr-ja/conf/velocity/head.vm 2011-09-05 23:56:41.000000000 +0900
+@@ -32,7 +32,7 @@
+ extraParams:{
+ 'terms.prefix': function() { return $("\#q").val();},
+ 'terms.sort': 'count',
+- 'terms.fl': 'name',
++ 'terms.fl': 'text',
+ 'wt': 'velocity',
+ 'v.template': 'suggest'
+ }
+--- example/solr-ja/conf/velocity/suggest.vm.orig 2011-09-03 23:57:07.000000000 +0900
++++ example/solr-ja/conf/velocity/suggest.vm 2011-09-05 23:57:16.000000000 +0900
+@@ -1,3 +1,3 @@
+-#foreach($t in $response.response.terms.name)
++#foreach($t in $response.response.terms.text)
+ $t.key
+ #end
+\ No newline at end of file
Added: trunk/dports/java/apache-solr/files/solr-ja.xml
===================================================================
--- trunk/dports/java/apache-solr/files/solr-ja.xml (rev 0)
+++ trunk/dports/java/apache-solr/files/solr-ja.xml 2011-09-12 14:50:27 UTC (rev 83880)
@@ -0,0 +1,14 @@
+<add>
+<doc>
+ <field name="id">SOLR2000</field>
+ <field name="name">Solr(ソーラ), オープンソースの全文検索システム</field>
+ <field name="manu">Apacheソフトウェア財団</field>
+ <field name="cat">ソフトウェア</field>
+ <field name="cat">検索</field>
+ <field name="features">Luceneを使った先進的な全文検索機能</field>
+ <field name="price">0</field>
+ <field name="popularity">10</field>
+ <field name="inStock">true</field>
+ <field name="incubationdate_dt">2006-01-17T00:00:00.000Z</field>
+</doc>
+</add>
Modified: trunk/dports/java/apache-solr/files/solr.in
===================================================================
--- trunk/dports/java/apache-solr/files/solr.in 2011-09-12 14:46:02 UTC (rev 83879)
+++ trunk/dports/java/apache-solr/files/solr.in 2011-09-12 14:50:27 UTC (rev 83880)
@@ -2,16 +2,22 @@
CMDNAME=`basename "$0"`
-export JAVA_HOME=@JAVA_HOME@
-SOLR=@TARGET@
-
usage() {
- echo "Usage: ${CMDNAME} path/to/config/dir" 1>&2
+ echo "Usage: ${CMDNAME} [-h|--help] [solr_home]" 1>&2
exit 1
}
-if [ -z "$1" ]; then
- usage
-else
- cd ${SOLR} && exec ${JAVA_HOME}/bin/java -Dsolr.solr.home="$1" -jar ${SOLR}/start.jar
-fi
+SOLR_PATH=@solr_path@
+SOLR_HOME=@solr_home@
+
+while test -n "$1"; do
+ case "$1" in
+ -h|--help) usage ;;
+ *) SOLR_HOME="$1"; shift ;;
+ esac
+done
+
+java -Dsolr.clustering.enabled=true \
+ -Dsolr.solr.home="${SOLR_HOME}" \
+ -Djetty.home="${SOLR_PATH}" \
+ -jar ${SOLR_PATH}/start.jar
Added: trunk/dports/java/apache-solr/files/velocity.properties
===================================================================
--- trunk/dports/java/apache-solr/files/velocity.properties (rev 0)
+++ trunk/dports/java/apache-solr/files/velocity.properties 2011-09-12 14:50:27 UTC (rev 83880)
@@ -0,0 +1,2 @@
+input.encoding=UTF-8
+output.encoding=UTF-8
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macports-changes/attachments/20110912/f8c1e334/attachment.html>
More information about the macports-changes
mailing list