[133890] trunk/dports/textproc
hum at macports.org
hum at macports.org
Sat Mar 14 17:40:40 PDT 2015
Revision: 133890
https://trac.macports.org/changeset/133890
Author: hum at macports.org
Date: 2015-03-14 17:40:40 -0700 (Sat, 14 Mar 2015)
Log Message:
-----------
New port: mecab-ipadic-neologd @ 20150315 - Neologism dictionary for MeCab
Added Paths:
-----------
trunk/dports/textproc/mecab-ipadic-neologd/
trunk/dports/textproc/mecab-ipadic-neologd/Portfile
trunk/dports/textproc/mecab-ipadic-neologd/files/
trunk/dports/textproc/mecab-ipadic-neologd/files/patch-make.sh.diff
trunk/dports/textproc/mecab-ipadic-neologd/files/patch-test.sh.diff
Added: trunk/dports/textproc/mecab-ipadic-neologd/Portfile
===================================================================
--- trunk/dports/textproc/mecab-ipadic-neologd/Portfile (rev 0)
+++ trunk/dports/textproc/mecab-ipadic-neologd/Portfile 2015-03-15 00:40:40 UTC (rev 133890)
@@ -0,0 +1,60 @@
+# -*- coding: utf-8; mode: tcl; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- vim:fenc=utf-8:ft=tcl:et:sw=4:ts=4:sts=4
+# $Id$
+
+PortSystem 1.0
+PortGroup github 1.0
+
+github.setup neologd mecab-ipadic-neologd ae5886767977c5baa0d34017928d9cd752d0330d
+version 20150315
+set dic_version 20150313
+categories textproc japanese
+license Apache-2
+platforms darwin
+supported_archs noarch
+maintainers hum openmaintainer
+
+description Neologism dictionary for MeCab
+long_description mecab-ipadic-neologd is customized system dictionary for \
+ MeCab. This dictionary includes many neologisms (new word), \
+ which are \extracted from many language resources on the Web. \
+ When you analyze the Web documents, it's better to use this \
+ system dictionary and default one (ipadic) together.
+
+dist_subdir mecab
+
+checksums rmd160 07968c2d46b4ac2e065e5f6e08708641fb481d8c \
+ sha256 1354ea3752ad2db58a7ef6878f9dae49d76e5f88516e032293858bb2743096ca
+
+depends_lib port:mecab-base
+
+depends_build port:mecab-ipadic-utf8 \
+ port:libiconv \
+ port:xz
+
+patchfiles patch-make.sh.diff \
+ patch-test.sh.diff
+
+post-patch {
+ reinplace "s|@PREFIX@|${prefix}|g" ${worksrcpath}/libexec/test-mecab-ipadic-neologd.sh
+}
+
+use_configure no
+
+build {
+ system -W ${worksrcpath} ./libexec/make-mecab-ipadic-neologd.sh
+ system -W ${worksrcpath} ./libexec/test-mecab-ipadic-neologd.sh
+}
+
+destroot.dir ${worksrcpath}/build/mecab-ipadic-2.7.0-20070801-neologd-${dic_version}
+
+post-destroot {
+ move ${destroot}${prefix}/lib/mecab/dic/${name} \
+ ${destroot}${prefix}/lib/mecab/dic/neologd-utf8
+ # install additional documents
+ set docdir ${prefix}/share/doc/${name}
+ xinstall -d ${destroot}${docdir}
+ xinstall -m 644 -W ${worksrcpath} \
+ COPYING README.ja.md README.md \
+ ${destroot}${docdir}
+ xinstall -m 644 ${destroot.dir}/COPYING ${destroot}${docdir}/COPYING-ipadic
+}
Added: trunk/dports/textproc/mecab-ipadic-neologd/files/patch-make.sh.diff
===================================================================
--- trunk/dports/textproc/mecab-ipadic-neologd/files/patch-make.sh.diff (rev 0)
+++ trunk/dports/textproc/mecab-ipadic-neologd/files/patch-make.sh.diff 2015-03-15 00:40:40 UTC (rev 133890)
@@ -0,0 +1,19 @@
+--- libexec/make-mecab-ipadic-neologd.sh.orig 2015-03-15 09:34:32.000000000 +0900
++++ libexec/make-mecab-ipadic-neologd.sh 2015-03-15 09:36:27.000000000 +0900
+@@ -86,11 +86,11 @@
+ echo "${ECHO_PREFIX} Encode the character encoding of system dictionary resources from EUC_JP to UTF-8"
+ sed -i -e "s|${MECAB_DIC_DIR}/ipadic|${INSTALL_DIR_PATH}|p" ${NEOLOGD_DIC_DIR}/Makefile
+
+-find ${NEOLOGD_DIC_DIR} -type f | xargs file | grep ".csv" | cut -d: -f1 | xargs -t -I{} ${BASEDIR}/../libexec/iconv_euc_to_utf8.sh {}
+-find ${NEOLOGD_DIC_DIR} -type f | xargs file | grep ".csv" | grep -v ".utf8" | cut -d: -f1 | xargs -t -I{} rm {}
+-find ${NEOLOGD_DIC_DIR} -type f | xargs file | grep ".def" | cut -d: -f1 | xargs -t -I{} ${BASEDIR}/../libexec/iconv_euc_to_utf8.sh {}
+-find ${NEOLOGD_DIC_DIR} -type f | xargs file | grep ".def" | grep -v ".utf8" | cut -d: -f1 | xargs -t -I{} rm {}
+-find ${NEOLOGD_DIC_DIR} -type f | xargs file | grep ".utf8" | cut -d: -f1 | sed -e "s|.utf8||" | xargs -t -I{} mv {}.utf8 {}
++find . -type f | xargs file | grep ".csv" | cut -d: -f1 | xargs -t -I{} ${BASEDIR}/../libexec/iconv_euc_to_utf8.sh {}
++find . -type f | xargs file | grep ".csv" | grep -v ".utf8" | cut -d: -f1 | xargs -t -I{} rm {}
++find . -type f | xargs file | grep ".def" | cut -d: -f1 | xargs -t -I{} ${BASEDIR}/../libexec/iconv_euc_to_utf8.sh {}
++find . -type f | xargs file | grep ".def" | grep -v ".utf8" | cut -d: -f1 | xargs -t -I{} rm {}
++find . -type f | xargs file | grep ".utf8" | cut -d: -f1 | sed -e "s|.utf8||" | xargs -t -I{} mv {}.utf8 {}
+
+ echo "${ECHO_PREFIX} Copy user dictionary resource"
+ SEED_FILE_NAME=mecab-user-dict-seed.${YMD}.csv
Added: trunk/dports/textproc/mecab-ipadic-neologd/files/patch-test.sh.diff
===================================================================
--- trunk/dports/textproc/mecab-ipadic-neologd/files/patch-test.sh.diff (rev 0)
+++ trunk/dports/textproc/mecab-ipadic-neologd/files/patch-test.sh.diff 2015-03-15 00:40:40 UTC (rev 133890)
@@ -0,0 +1,65 @@
+--- libexec/test-mecab-ipadic-neologd.sh.orig 2015-03-09 19:36:37.000000000 +0900
++++ libexec/test-mecab-ipadic-neologd.sh 2015-03-14 23:47:54.000000000 +0900
+@@ -21,8 +21,8 @@
+
+ echo "$ECHO_PREFIX Start.."
+
+-echo "$ECHO_PREFIX Replace timestamp from 'git clone' date to 'git commit' date"
+-${BASEDIR}/../misc/git-set-file-times
++#echo "$ECHO_PREFIX Replace timestamp from 'git clone' date to 'git commit' date"
++#${BASEDIR}/../misc/git-set-file-times
+
+ YMD=`ls -c \`find ${BASEDIR}/../seed/mecab-user-dict-seed.*.csv.xz\` | head -1 | egrep -o '[0-9]{8}' | tail -1`
+ if [ ! -e ${BASEDIR}/../build/mecab-ipadic-2.7.0-20070801-neologd-${YMD} ]; then
+@@ -36,36 +36,37 @@
+
+ echo "$ECHO_PREFIX Get buzz phrases"
+
+-curl http://searchranking.yahoo.co.jp/realtime_buzz/ -o "/tmp/realtime_buzz.html"
+-sed -i -e "/\n/d" /tmp/realtime_buzz.html
+-cat /tmp/realtime_buzz.html | perl -ne '$l = $_; if ($l =~ m|<h3><a href="http://rdsig\.yahoo\.co\.jp.+?">(.+)</a></h3>|g){ print $1."\n";}' > /tmp/buzz_phrase
++mkdir -p tmp
++curl http://searchranking.yahoo.co.jp/realtime_buzz/ -o "./tmp/realtime_buzz.html"
++sed -i -e "/\n/d" ./tmp/realtime_buzz.html
++cat ./tmp/realtime_buzz.html | perl -ne '$l = $_; if ($l =~ m|<h3><a href="http://rdsig\.yahoo\.co\.jp.+?">(.+)</a></h3>|g){ print $1."\n";}' > ./tmp/buzz_phrase
+
+-PHRASE_FILE=/tmp/buzz_phrase
++PHRASE_FILE=./tmp/buzz_phrase
+ if [ ! -s ${PHRASE_FILE} ]; then
+ PHRASE_FILE=""#${BASEDIR}/../misc/buzz_phrase_201402181610
+ fi
+
+ echo "$ECHO_PREFIX Get difference between default system dictionary and mecab-ipadic-neologd"
+
+-cat /tmp/buzz_phrase| mecab -Owakati > /tmp/buzz_phrase_tokenized_using_defdic
+-cat /tmp/buzz_phrase| mecab -Owakati -d ${MECAB_DIC_DIR} > /tmp/buzz_phrase_tokenized_using_neologismdic
+-/usr/bin/diff -y -W60 --side-by-side --suppress-common-lines /tmp/buzz_phrase_tokenized_using_defdic /tmp/buzz_phrase_tokenized_using_neologismdic > /tmp/buzz_phrase_tokenized_diff
++cat ./tmp/buzz_phrase| mecab -Owakati -d @PREFIX@/lib/mecab/dic/ipadic-utf8 > ./tmp/buzz_phrase_tokenized_using_defdic
++cat ./tmp/buzz_phrase| mecab -Owakati -d ${MECAB_DIC_DIR} > ./tmp/buzz_phrase_tokenized_using_neologismdic
++/usr/bin/diff -y -W60 --side-by-side --suppress-common-lines ./tmp/buzz_phrase_tokenized_using_defdic ./tmp/buzz_phrase_tokenized_using_neologismdic > ./tmp/buzz_phrase_tokenized_diff
+
+-if [ -s /tmp/buzz_phrase_tokenized_diff ]; then
++if [ -s ./tmp/buzz_phrase_tokenized_diff ]; then
+ echo "$ECHO_PREFIX Tokenize phrase using default system dictionary"
+- echo "default system dictonary" > /tmp/buzz_phrase_tokenized_using_defdic
+- cat /tmp/buzz_phrase| mecab -Owakati >> /tmp/buzz_phrase_tokenized_using_defdic
++ echo "default system dictonary" > ./tmp/buzz_phrase_tokenized_using_defdic
++ cat ./tmp/buzz_phrase| mecab -Owakati -d @PREFIX@/lib/mecab/dic/ipadic-utf8 >> ./tmp/buzz_phrase_tokenized_using_defdic
+
+ echo "$ECHO_PREFIX Tokenize phrase using mecab-ipadic-neologd"
+- echo "mecab-ipadic-neologd" > /tmp/buzz_phrase_tokenized_using_neologismdic
+- cat /tmp/buzz_phrase| mecab -Owakati -d ${MECAB_DIC_DIR} >> /tmp/buzz_phrase_tokenized_using_neologismdic
++ echo "mecab-ipadic-neologd" > ./tmp/buzz_phrase_tokenized_using_neologismdic
++ cat ./tmp/buzz_phrase| mecab -Owakati -d ${MECAB_DIC_DIR} >> ./tmp/buzz_phrase_tokenized_using_neologismdic
+
+ echo "$ECHO_PREFIX Get result of diff"
+- /usr/bin/diff -y -W60 --side-by-side --suppress-common-lines /tmp/buzz_phrase_tokenized_using_defdic /tmp/buzz_phrase_tokenized_using_neologismdic > /tmp/buzz_phrase_tokenized_diff
++ /usr/bin/diff -y -W60 --side-by-side --suppress-common-lines ./tmp/buzz_phrase_tokenized_using_defdic ./tmp/buzz_phrase_tokenized_using_neologismdic > ./tmp/buzz_phrase_tokenized_diff
+
+ echo "$ECHO_PREFIX Please check difference between default system dictionary and mecab-ipadic-neologd"
+ echo ""
+- cat /tmp/buzz_phrase_tokenized_diff
++ cat ./tmp/buzz_phrase_tokenized_diff
+ echo ""
+ else
+ echo "$ECHO_PREFIX Something wrong. You shouldn't install mecab-ipadic-neologd yet."
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.macosforge.org/pipermail/macports-changes/attachments/20150314/89cb8935/attachment-0001.html>
More information about the macports-changes
mailing list