[133890] trunk/dports/textproc

hum at macports.org hum at macports.org
Sat Mar 14 17:40:40 PDT 2015


Revision: 133890
          https://trac.macports.org/changeset/133890
Author:   hum at macports.org
Date:     2015-03-14 17:40:40 -0700 (Sat, 14 Mar 2015)
Log Message:
-----------
New port: mecab-ipadic-neologd @ 20150315 - Neologism dictionary for MeCab

Added Paths:
-----------
    trunk/dports/textproc/mecab-ipadic-neologd/
    trunk/dports/textproc/mecab-ipadic-neologd/Portfile
    trunk/dports/textproc/mecab-ipadic-neologd/files/
    trunk/dports/textproc/mecab-ipadic-neologd/files/patch-make.sh.diff
    trunk/dports/textproc/mecab-ipadic-neologd/files/patch-test.sh.diff

Added: trunk/dports/textproc/mecab-ipadic-neologd/Portfile
===================================================================
--- trunk/dports/textproc/mecab-ipadic-neologd/Portfile	                        (rev 0)
+++ trunk/dports/textproc/mecab-ipadic-neologd/Portfile	2015-03-15 00:40:40 UTC (rev 133890)
@@ -0,0 +1,60 @@
+# -*- coding: utf-8; mode: tcl; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- vim:fenc=utf-8:ft=tcl:et:sw=4:ts=4:sts=4
+# $Id$
+
+PortSystem          1.0
+PortGroup           github 1.0
+
+github.setup        neologd mecab-ipadic-neologd ae5886767977c5baa0d34017928d9cd752d0330d
+version             20150315
+set dic_version     20150313
+categories          textproc japanese
+license             Apache-2
+platforms           darwin
+supported_archs     noarch
+maintainers         hum openmaintainer
+
+description         Neologism dictionary for MeCab
+long_description    mecab-ipadic-neologd is customized system dictionary for \
+                    MeCab. This dictionary includes many neologisms (new word), \
+                    which are \extracted from many language resources on the Web. \
+                    When you analyze the Web documents, it's better to use this \
+                    system dictionary and default one (ipadic) together.
+
+dist_subdir         mecab
+
+checksums           rmd160  07968c2d46b4ac2e065e5f6e08708641fb481d8c \
+                    sha256  1354ea3752ad2db58a7ef6878f9dae49d76e5f88516e032293858bb2743096ca
+
+depends_lib         port:mecab-base
+
+depends_build       port:mecab-ipadic-utf8 \
+                    port:libiconv \
+                    port:xz
+
+patchfiles          patch-make.sh.diff \
+                    patch-test.sh.diff
+
+post-patch {
+    reinplace "s|@PREFIX@|${prefix}|g" ${worksrcpath}/libexec/test-mecab-ipadic-neologd.sh
+}
+
+use_configure       no
+
+build {
+    system -W ${worksrcpath} ./libexec/make-mecab-ipadic-neologd.sh
+    system -W ${worksrcpath} ./libexec/test-mecab-ipadic-neologd.sh
+}
+
+destroot.dir        ${worksrcpath}/build/mecab-ipadic-2.7.0-20070801-neologd-${dic_version}
+
+post-destroot {
+    move ${destroot}${prefix}/lib/mecab/dic/${name} \
+        ${destroot}${prefix}/lib/mecab/dic/neologd-utf8
+    # install additional documents
+    set docdir      ${prefix}/share/doc/${name}
+    xinstall -d ${destroot}${docdir}
+    xinstall -m 644 -W ${worksrcpath} \
+        COPYING README.ja.md README.md \
+        ${destroot}${docdir}
+    xinstall -m 644 ${destroot.dir}/COPYING ${destroot}${docdir}/COPYING-ipadic
+} 

Added: trunk/dports/textproc/mecab-ipadic-neologd/files/patch-make.sh.diff
===================================================================
--- trunk/dports/textproc/mecab-ipadic-neologd/files/patch-make.sh.diff	                        (rev 0)
+++ trunk/dports/textproc/mecab-ipadic-neologd/files/patch-make.sh.diff	2015-03-15 00:40:40 UTC (rev 133890)
@@ -0,0 +1,19 @@
+--- libexec/make-mecab-ipadic-neologd.sh.orig	2015-03-15 09:34:32.000000000 +0900
++++ libexec/make-mecab-ipadic-neologd.sh	2015-03-15 09:36:27.000000000 +0900
+@@ -86,11 +86,11 @@
+ echo "${ECHO_PREFIX} Encode the character encoding of system dictionary resources from EUC_JP to UTF-8"
+ sed -i -e "s|${MECAB_DIC_DIR}/ipadic|${INSTALL_DIR_PATH}|p" ${NEOLOGD_DIC_DIR}/Makefile
+ 
+-find ${NEOLOGD_DIC_DIR} -type f | xargs file | grep ".csv" | cut -d: -f1 | xargs -t -I{} ${BASEDIR}/../libexec/iconv_euc_to_utf8.sh {}
+-find ${NEOLOGD_DIC_DIR} -type f | xargs file | grep ".csv" | grep -v ".utf8" | cut -d: -f1 | xargs -t -I{} rm {}
+-find ${NEOLOGD_DIC_DIR} -type f | xargs file | grep ".def" | cut -d: -f1 | xargs -t -I{} ${BASEDIR}/../libexec/iconv_euc_to_utf8.sh {}
+-find ${NEOLOGD_DIC_DIR} -type f | xargs file | grep ".def" | grep -v ".utf8" | cut -d: -f1 | xargs -t -I{} rm {}
+-find ${NEOLOGD_DIC_DIR} -type f | xargs file | grep  ".utf8" | cut -d: -f1 |  sed -e "s|.utf8||" |  xargs -t -I{} mv {}.utf8 {}
++find . -type f | xargs file | grep ".csv" | cut -d: -f1 | xargs -t -I{} ${BASEDIR}/../libexec/iconv_euc_to_utf8.sh {}
++find . -type f | xargs file | grep ".csv" | grep -v ".utf8" | cut -d: -f1 | xargs -t -I{} rm {}
++find . -type f | xargs file | grep ".def" | cut -d: -f1 | xargs -t -I{} ${BASEDIR}/../libexec/iconv_euc_to_utf8.sh {}
++find . -type f | xargs file | grep ".def" | grep -v ".utf8" | cut -d: -f1 | xargs -t -I{} rm {}
++find . -type f | xargs file | grep  ".utf8" | cut -d: -f1 |  sed -e "s|.utf8||" |  xargs -t -I{} mv {}.utf8 {}
+ 
+ echo "${ECHO_PREFIX} Copy user dictionary resource"
+ SEED_FILE_NAME=mecab-user-dict-seed.${YMD}.csv

Added: trunk/dports/textproc/mecab-ipadic-neologd/files/patch-test.sh.diff
===================================================================
--- trunk/dports/textproc/mecab-ipadic-neologd/files/patch-test.sh.diff	                        (rev 0)
+++ trunk/dports/textproc/mecab-ipadic-neologd/files/patch-test.sh.diff	2015-03-15 00:40:40 UTC (rev 133890)
@@ -0,0 +1,65 @@
+--- libexec/test-mecab-ipadic-neologd.sh.orig	2015-03-09 19:36:37.000000000 +0900
++++ libexec/test-mecab-ipadic-neologd.sh	2015-03-14 23:47:54.000000000 +0900
+@@ -21,8 +21,8 @@
+ 
+ echo "$ECHO_PREFIX Start.."
+ 
+-echo "$ECHO_PREFIX Replace timestamp from 'git clone' date to 'git commit' date"
+-${BASEDIR}/../misc/git-set-file-times
++#echo "$ECHO_PREFIX Replace timestamp from 'git clone' date to 'git commit' date"
++#${BASEDIR}/../misc/git-set-file-times
+ 
+ YMD=`ls -c \`find ${BASEDIR}/../seed/mecab-user-dict-seed.*.csv.xz\` | head -1 | egrep -o '[0-9]{8}' | tail -1`
+ if [ ! -e ${BASEDIR}/../build/mecab-ipadic-2.7.0-20070801-neologd-${YMD} ]; then
+@@ -36,36 +36,37 @@
+ 
+ echo "$ECHO_PREFIX Get buzz phrases"
+ 
+-curl http://searchranking.yahoo.co.jp/realtime_buzz/ -o "/tmp/realtime_buzz.html"
+-sed -i -e "/\n/d" /tmp/realtime_buzz.html
+-cat /tmp/realtime_buzz.html | perl -ne '$l = $_;  if ($l =~ m|<h3><a href="http://rdsig\.yahoo\.co\.jp.+?">(.+)</a></h3>|g){ print $1."\n";}' > /tmp/buzz_phrase
++mkdir -p tmp
++curl http://searchranking.yahoo.co.jp/realtime_buzz/ -o "./tmp/realtime_buzz.html"
++sed -i -e "/\n/d" ./tmp/realtime_buzz.html
++cat ./tmp/realtime_buzz.html | perl -ne '$l = $_;  if ($l =~ m|<h3><a href="http://rdsig\.yahoo\.co\.jp.+?">(.+)</a></h3>|g){ print $1."\n";}' > ./tmp/buzz_phrase
+ 
+-PHRASE_FILE=/tmp/buzz_phrase
++PHRASE_FILE=./tmp/buzz_phrase
+ if [ ! -s ${PHRASE_FILE} ]; then
+    PHRASE_FILE=""#${BASEDIR}/../misc/buzz_phrase_201402181610
+ fi
+ 
+ echo "$ECHO_PREFIX Get difference between default system dictionary and mecab-ipadic-neologd"
+ 
+-cat /tmp/buzz_phrase| mecab -Owakati > /tmp/buzz_phrase_tokenized_using_defdic
+-cat /tmp/buzz_phrase| mecab -Owakati -d ${MECAB_DIC_DIR} > /tmp/buzz_phrase_tokenized_using_neologismdic
+-/usr/bin/diff -y -W60 --side-by-side --suppress-common-lines /tmp/buzz_phrase_tokenized_using_defdic /tmp/buzz_phrase_tokenized_using_neologismdic > /tmp/buzz_phrase_tokenized_diff
++cat ./tmp/buzz_phrase| mecab -Owakati -d @PREFIX@/lib/mecab/dic/ipadic-utf8 > ./tmp/buzz_phrase_tokenized_using_defdic
++cat ./tmp/buzz_phrase| mecab -Owakati -d ${MECAB_DIC_DIR} > ./tmp/buzz_phrase_tokenized_using_neologismdic
++/usr/bin/diff -y -W60 --side-by-side --suppress-common-lines ./tmp/buzz_phrase_tokenized_using_defdic ./tmp/buzz_phrase_tokenized_using_neologismdic > ./tmp/buzz_phrase_tokenized_diff
+ 
+-if [ -s /tmp/buzz_phrase_tokenized_diff ]; then
++if [ -s ./tmp/buzz_phrase_tokenized_diff ]; then
+     echo "$ECHO_PREFIX Tokenize phrase using default system dictionary"
+-    echo "default system dictonary" > /tmp/buzz_phrase_tokenized_using_defdic
+-    cat /tmp/buzz_phrase| mecab -Owakati >> /tmp/buzz_phrase_tokenized_using_defdic
++    echo "default system dictonary" > ./tmp/buzz_phrase_tokenized_using_defdic
++    cat ./tmp/buzz_phrase| mecab -Owakati -d @PREFIX@/lib/mecab/dic/ipadic-utf8 >> ./tmp/buzz_phrase_tokenized_using_defdic
+ 
+     echo "$ECHO_PREFIX Tokenize phrase using mecab-ipadic-neologd"
+-    echo "mecab-ipadic-neologd" > /tmp/buzz_phrase_tokenized_using_neologismdic
+-    cat /tmp/buzz_phrase| mecab -Owakati -d ${MECAB_DIC_DIR} >> /tmp/buzz_phrase_tokenized_using_neologismdic
++    echo "mecab-ipadic-neologd" > ./tmp/buzz_phrase_tokenized_using_neologismdic
++    cat ./tmp/buzz_phrase| mecab -Owakati -d ${MECAB_DIC_DIR} >> ./tmp/buzz_phrase_tokenized_using_neologismdic
+ 
+     echo "$ECHO_PREFIX Get result of diff"
+-    /usr/bin/diff -y -W60 --side-by-side --suppress-common-lines /tmp/buzz_phrase_tokenized_using_defdic /tmp/buzz_phrase_tokenized_using_neologismdic > /tmp/buzz_phrase_tokenized_diff
++    /usr/bin/diff -y -W60 --side-by-side --suppress-common-lines ./tmp/buzz_phrase_tokenized_using_defdic ./tmp/buzz_phrase_tokenized_using_neologismdic > ./tmp/buzz_phrase_tokenized_diff
+ 
+     echo "$ECHO_PREFIX Please check difference between default system dictionary and mecab-ipadic-neologd"
+     echo ""
+-    cat /tmp/buzz_phrase_tokenized_diff
++    cat ./tmp/buzz_phrase_tokenized_diff
+     echo ""
+ else
+     echo "$ECHO_PREFIX Something wrong. You shouldn't install mecab-ipadic-neologd yet."
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.macosforge.org/pipermail/macports-changes/attachments/20150314/89cb8935/attachment-0001.html>


More information about the macports-changes mailing list