[99780] trunk/dports/perl
hum at macports.org
hum at macports.org
Sat Nov 17 07:22:50 PST 2012
Revision: 99780
https://trac.macports.org/changeset/99780
Author: hum at macports.org
Date: 2012-11-17 07:22:50 -0800 (Sat, 17 Nov 2012)
Log Message:
-----------
New port: p5-termextract @4.08 - a Perl module to extract technical terms from texts.
Added Paths:
-----------
trunk/dports/perl/p5-termextract/
trunk/dports/perl/p5-termextract/Portfile
trunk/dports/perl/p5-termextract/files/
trunk/dports/perl/p5-termextract/files/patch-utf8.diff
Added: trunk/dports/perl/p5-termextract/Portfile
===================================================================
--- trunk/dports/perl/p5-termextract/Portfile (rev 0)
+++ trunk/dports/perl/p5-termextract/Portfile 2012-11-17 15:22:50 UTC (rev 99780)
@@ -0,0 +1,73 @@
+# -*- coding: utf-8; mode: tcl; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- vim:fenc=utf-8:ft=tcl:et:sw=4:ts=4:sts=4
+# $Id$
+
+PortSystem 1.0
+PortGroup perl5 1.0
+
+perl5.branches 5.8 5.10 5.12 5.14 5.16
+perl5.setup TermExtract 4.08
+categories-append textproc japanese
+maintainers hum openmaintainer
+
+description a Perl module to extract technical terms from texts
+long_description ${name} is ${description}.
+
+homepage http://gensen.dl.itc.u-tokyo.ac.jp/termextract.html
+platforms darwin
+license Permissive
+
+master_sites http://gensen.dl.itc.u-tokyo.ac.jp/soft/
+checksums rmd160 cf3a4100207952f893be313515047f510136abbd \
+ sha256 fc01bcb53d0fc9541142e38541d408151315a75d18147151173e3e5e490d7ef8
+
+distname ${perl5.module}-[strsed ${perl5.moduleversion} {g/\./_/}]
+
+depends_build port:nkf
+
+if {${perl5.major} != ""} {
+ post-extract {
+ # create TermExtract/JapanesePlainText.pm and SampleScripts/UNIX/ex_JPT.pl for UTF-8.
+ copy ${worksrcpath}/TermExtract/JapanesePlainTextEUC.pm \
+ ${worksrcpath}/TermExtract/JapanesePlainText.pm
+ copy ${worksrcpath}/SampleScripts/UNIX/ex_JPTE.pl \
+ ${worksrcpath}/SampleScripts/UNIX/ex_JPT.pl
+ }
+
+ patchfiles-append patch-utf8.diff
+
+ post-patch {
+ # convert SampleScripts/UNIX/ex_{chasen,mecab,JPT}.pl for UTF-8.
+ foreach f {ex_chasen.pl ex_mecab.pl ex_JPT.pl} {
+ system -W ${worksrcpath}/SampleScripts/UNIX "nkf --in-place -w ${f}"
+ }
+ # fix perl path.
+ foreach f [glob ${worksrcpath}/TermExtract/*] {
+ reinplace -locale C "s|/usr/local/bin/perl|${perl5.bin}|g" ${f}
+ }
+ }
+
+ post-destroot {
+ # install sample scripts.
+ set scripts_dir ${destroot}${prefix}/share/termextract
+ copy ${worksrcpath}/SampleScripts/UNIX ${scripts_dir}
+ foreach f [glob ${scripts_dir}/*] {
+ reinplace -locale C "s|/usr/local/bin/perl|${perl5.bin}|g" ${f}
+ file attributes ${f} -permissions 0755
+ }
+ # install documents.
+ xinstall -d ${destroot}${prefix}/share/doc
+ set doc_dir ${destroot}${prefix}/share/doc/termextract
+ copy ${worksrcpath}/HTML ${doc_dir}
+ foreach f [glob ${doc_dir}/*] {
+ system -W ${scripts_dir} "nkf --in-place -w ${f}"
+ reinplace -locale C \
+ "s|<head>|<head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />|" \
+ ${f}
+ }
+ }
+}
+
+livecheck.type regex
+livecheck.url ${homepage}
+livecheck.version [strsed ${perl5.moduleversion} {g/\./_/}]
+livecheck.regex ${perl5.module}-(\[0-9_\]+)\\.
Property changes on: trunk/dports/perl/p5-termextract/Portfile
___________________________________________________________________
Added: svn:keywords
+ Id
Added: svn:eol-style
+ native
Added: trunk/dports/perl/p5-termextract/files/patch-utf8.diff
===================================================================
--- trunk/dports/perl/p5-termextract/files/patch-utf8.diff (rev 0)
+++ trunk/dports/perl/p5-termextract/files/patch-utf8.diff 2012-11-17 15:22:50 UTC (rev 99780)
@@ -0,0 +1,163 @@
+--- TermExtract/JapanesePlainText.pm.orig 2005-07-14 18:34:22.000000000 +0900
++++ TermExtract/JapanesePlainText.pm 2012-11-17 21:04:37.000000000 +0900
+@@ -1,8 +1,9 @@
+-package TermExtract::JapanesePlainTextEUC;
++package TermExtract::JapanesePlainText;
+ use TermExtract::Calc_Imp;
+
+ use strict;
+ use Exporter ();
++use Encode 'from_to';
+ use vars qw(@ISA $VERSION @EXPORT);
+
+ @ISA = qw(TermExtract::Calc_Imp Exporter);
+@@ -36,6 +37,8 @@
+ $data = <IN>;
+ close IN;
+ }
++ # convert utf-8 to eucjp.
++ Encode::from_to($data, 'UTF-8', 'EUC-JP');
+
+ foreach my $morph ((split /\n/, $data)) {
+ chomp $morph;
+@@ -43,6 +46,10 @@
+ my $terms = get_katakana_kanji($morph);
+ foreach my $cmp_noun (@$terms) {
+ next if @$cmp_noun < 2;
++ # convert eucjp to utf-8.
++ foreach my $elem (@$cmp_noun) {
++ Encode::from_to($elem, 'EUC-JP', 'UTF-8');
++ }
+ $cmp_noun_list{ join ' ', @$cmp_noun }++ if $$cmp_noun[0];
+ }
+ }
+@@ -135,16 +142,16 @@
+
+ =head1 NAME
+
+- TermExtract::JapanesePlainTextEUC
+- -- \xC0\xEC\xCC\xE7\xCDѸ켫ư\xC3\xEA\xBDХ⥸\xA5塼\xA5\xEB\xA1\xCA\xCF\xC2ʸ\xA1֥\xAB\xA5\xBF\xA5\xAB\xA5ʡ\xA6\xB4\xC1\xBB\xFA\xC3\xEA\xBD\xD0\xCA\xFD\xBC\xB0\xA1\xD7EUC\xC8ǡ\xCB
++ TermExtract::JapanesePlainText
++ -- \xC0\xEC\xCC\xE7\xCDѸ켫ư\xC3\xEA\xBDХ⥸\xA5塼\xA5\xEB\xA1\xCA\xCF\xC2ʸ\xA1֥\xAB\xA5\xBF\xA5\xAB\xA5ʡ\xA6\xB4\xC1\xBB\xFA\xC3\xEA\xBD\xD0\xCA\xFD\xBC\xB0\xA1\xD7UTF-8\xC8ǡ\xCB
+
+ =head1 SYNOPSIS
+
+- use TermExtract::JapanesePlainTextEUC;
++ use TermExtract::JapanesePlainText;
+
+ =head1 DESCRIPTION
+
+- \xC6\xFC\xCBܸ\xEC\xA4Υƥ\xAD\xA5\xB9\xA5ȥǡ\xBC\xA5\xBF\xA1\xCAEUC)\xA4\xAB\xA4餽\xA4Τޤ\xDE\xC0\xEC\xCC\xE7\xCDѸ\xEC\xA4\xF2\xC3\xEA\xBDФ\xB9\xA4\xEB\xA5ץ\x{D970}\xA5\xE9\xA5ࡣ
++ \xC6\xFC\xCBܸ\xEC\xA4Υƥ\xAD\xA5\xB9\xA5ȥǡ\xBC\xA5\xBF\xA1\xCAUTF-8)\xA4\xAB\xA4餽\xA4Τޤ\xDE\xC0\xEC\xCC\xE7\xCDѸ\xEC\xA4\xF2\xC3\xEA\xBDФ\xB9\xA4\xEB\xA5ץ\x{D970}\xA5\xE9\xA5ࡣ
+
+ \xC5\xF6\xA5⥸\xA5塼\xA5\xEB\xA4λ\xC8\xCD\xD1ˡ\xA4ˤĤ\xA4\xA4Ƥϡ\xA2\xBFƥ\xAF\xA5饹\xA1\xCATermExtract::Calc_Imp)\xA4\xAB\xA1\xA2
+ \xB0ʲ\xBC\xA4Υ\xB5\xA5\xF3\xA5ץ륹\xA5\xAF\xA5\xEA\xA5ץȤȤΤ\xB3\xA4ȡ\xA3
+@@ -154,10 +161,10 @@
+ #!/usr/local/bin/perl -w
+
+ #
+- # ex_JPTE.pl
++ # ex_JPT.pl
+ #
+ # ɸ\xBD\xE0\xBD\xD0\xCEϤ\xCB\xC0\xEC\xCC\xE7\xCDѸ\xEC\xA4Ȥ\xBD\xA4ν\xC5\xCD\xD7\xC5٤\xF2\xCA֤\xB9\xA5ץ\x{D970}\xA5\xE9\xA5\xE0
+- # \xCF\xC2ʸ\xA1֥\xAB\xA5\xBF\xA5\xAB\xA5ʡ\xA6\xB4\xC1\xBB\xFA\xC3\xEA\xBD\xD0\xCA\xFD\xBC\xB0\xA1\xD7EUC\xC8\xC7
++ # \xCF\xC2ʸ\xA1֥\xAB\xA5\xBF\xA5\xAB\xA5ʡ\xA6\xB4\xC1\xBB\xFA\xC3\xEA\xBD\xD0\xCA\xFD\xBC\xB0\xA1\xD7UTF-8\xC8\xC7
+ #
+ # version 0.05
+ #
+@@ -165,7 +172,7 @@
+
+ use TermExtract::JapanesePlainTextEUC;
+ #use strict;
+- my $data = new TermExtract::JapanesePlainTextEUC;
++ my $data = new TermExtract::JapanesePlainText;
+ my $InputFile = "JPTE_out.txt"; # \xC6\xFE\xCEϥե\xA1\xA5\xA4\xA5\xEB
+
+ # \xA5ץ\x{D97B}\xA5\xB9\xA4ΰ۾ェλ\xBB\xFE\xBD\xE8\xCD\xFD
+--- TermExtract/Chasen.pm.orig 2005-07-19 09:18:40.000000000 +0900
++++ TermExtract/Chasen.pm 2012-11-17 21:35:07.000000000 +0900
+@@ -3,6 +3,7 @@
+
+ use strict;
+ use Exporter ();
++use Encode 'from_to';
+ use vars qw(@ISA $VERSION @EXPORT);
+
+ @ISA = qw(TermExtract::Calc_Imp Exporter);
+@@ -48,6 +49,10 @@
+ $end =~ /^\s+$/ || $must)
+ { pop @$terms }
+ }
++ # convert eucjp to utf-8.
++ foreach my $elem (@$terms) {
++ Encode::from_to($elem, 'EUC-JP', 'UTF-8');
++ }
+ $cmp_noun_list->{ join ' ', @$terms }++ if defined $terms->[0];
+ @$terms = ();
+ };
+@@ -59,6 +64,8 @@
+ $data = <IN>;
+ close IN;
+ }
++ # convert utf-8 to eucjp.
++ Encode::from_to($data, 'UTF-8', 'EUC-JP');
+
+ # ñ̾\xBB\xEC\xA4\xCEϢ\xB7\xEB\xBD\xE8\xCD\xFD
+ foreach my $morph ((split "\n", $data)) {
+--- TermExtract/MeCab.pm.orig 2005-07-14 18:35:06.000000000 +0900
++++ TermExtract/MeCab.pm 2012-11-17 21:34:49.000000000 +0900
+@@ -3,6 +3,7 @@
+
+ use strict;
+ use Exporter ();
++use Encode 'from_to';
+ use vars qw(@ISA $VERSION @EXPORT);
+
+ @ISA = qw(TermExtract::Calc_Imp Exporter);
+@@ -47,6 +48,10 @@
+ $end =~ /^\s+$/ || $must)
+ { pop @$terms }
+ }
++ # convert eucjp to utf-8.
++ foreach my $elem (@$terms) {
++ Encode::from_to($elem, 'EUC-JP', 'UTF-8');
++ }
+ $cmp_noun_list->{ join ' ', @$terms }++ if defined $terms->[0];
+ @$terms = ();
+ };
+@@ -58,6 +63,8 @@
+ $data = <IN>;
+ close IN;
+ }
++ # convert utf-8 to eucjp.
++ Encode::from_to($data, 'UTF-8', 'EUC-JP');
+
+ # ñ̾\xBB\xEC\xA4\xCEϢ\xB7\xEB\xBD\xE8\xCD\xFD
+ foreach my $morph ((split "\n", $data)) {
+--- SampleScripts/UNIX/ex_JPT.pl.orig 2005-07-14 18:31:14.000000000 +0900
++++ SampleScripts/UNIX/ex_JPT.pl 2012-11-17 20:59:57.000000000 +0900
+@@ -1,19 +1,19 @@
+ #!/usr/local/bin/perl -w
+
+ #
+-# ex_JPTE.pl
++# ex_JPT.pl
+ #
+ # ɸ\xBD\xE0\xBD\xD0\xCEϤ\xCB\xC0\xEC\xCC\xE7\xCDѸ\xEC\xA4Ȥ\xBD\xA4ν\xC5\xCD\xD7\xC5٤\xF2\xCA֤\xB9\xA5ץ\x{D970}\xA5\xE9\xA5\xE0
+-# \xCF\xC2ʸ\xA1֥\xAB\xA5\xBF\xA5\xAB\xA5ʡ\xA6\xB4\xC1\xBB\xFA\xC3\xEA\xBD\xD0\xCA\xFD\xBC\xB0\xA1\xD7EUC\xC8\xC7
++# \xCF\xC2ʸ\xA1֥\xAB\xA5\xBF\xA5\xAB\xA5ʡ\xA6\xB4\xC1\xBB\xFA\xC3\xEA\xBD\xD0\xCA\xFD\xBC\xB0\xA1\xD7UTF-8\xC8\xC7
+ #
+ # version 0.05
+ #
+ #
+
+-use TermExtract::JapanesePlainTextEUC;
++use TermExtract::JapanesePlainText;
+ #use strict;
+-my $data = new TermExtract::JapanesePlainTextEUC;
+-my $InputFile = "JPTE_out.txt"; # \xC6\xFE\xCEϥե\xA1\xA5\xA4\xA5\xEB
++my $data = new TermExtract::JapanesePlainText;
++my $InputFile = "JPT_out.txt"; # \xC6\xFE\xCEϥե\xA1\xA5\xA4\xA5\xEB
+
+ # \xA5ץ\x{D97B}\xA5\xB9\xA4ΰ۾ェλ\xBB\xFE\xBD\xE8\xCD\xFD
+ # (\xA5\xED\xA5å\xAF\xA5ǥ\xA3\xA5쥯\xA5ȥ\xEA\xA4\xF2\xBB\xC8\xCDѤ\xB7\xA4\xBF\xBE\xEC\xB9\xE7\xA4Τߡ\xCB
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macports-changes/attachments/20121117/3a40a76f/attachment-0001.html>
More information about the macports-changes
mailing list