結局ファイル書き出しでmecab&cabocha
パフォーマンス低下が著しいし,TOKENの設定が難しいので,入力データも出力データも一旦ファイルシステムを使うことに.
将来的にはtmpfsを使えばいいかな.
オプション指定があるので,前回とは違い,mecabおよびcabocha専用.
CabochaTest.java
import java.io.*; import org.jpn.syo.*; /** * * @author SyoTakasaki */ public class CabochaTest { /** * @param args the command line arguments */ public static void main(String[] args) throws Exception { // INIT String file_text = args[0]; String data_text = null; String data_mecab = null; String data_cabocha = null; // テキストファイルの読み込み data_text = read_file_as_text(file_text); data_text = data_text.replaceAll("。", "。\n"); data_text = data_text.replaceAll(".", ".\n"); // Mecab data_mecab = Execute.exec("mecab", data_text); // Cabocha data_cabocha = Execute.exec("cabocha", data_mecab); System.out.println(data_mecab); } private static String read_file_as_text(String file_path) throws FileNotFoundException, UnsupportedEncodingException, IOException{ StringBuilder sb = new StringBuilder(); BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file_path), "UTF-8")); String line; while((line = br.readLine()) != null){ sb.append(line).append("\n"); } br.close(); return sb.toString(); } }
org/jpn/syo/Execute.java
/* * To change this template, choose Tools | Templates * and open the template in the editor. */ package org.jpn.syo; import java.io.*; /** * * @author SyoTakasaki */ public class Execute { private static final java.util.Random random = new java.util.Random(); // cmdには"mecab"あるいは"cabocha"を受け付ける public static String exec(String cmd, String inputText) throws IOException, InterruptedException, Exception { String result = null; String file_name = null; String command = null; // ファイル名用ランダム文字列 file_name = Long.toHexString(random.nextLong()); file_name = new File(file_name).getAbsolutePath(); // テキストファイルへの書き出し write_file_as_text(file_name + ".in", inputText); // Mecabの場合 if(cmd.equals("mecab")){ command = "/usr/bin/mecab " + file_name + ".in --output=" + file_name + ".out"; } // Cabochaの場合 if(cmd.equals("cabocha")){ command = "/usr/bin/cabocha -f1 -I1 " + file_name + ".in --output=" + file_name + ".out"; } if(command == null){ Exception ex = new Exception("UnsupportedCommandException"); throw ex; } // 実行 Process p = new ProcessBuilder(command.split(" ")).start(); p.waitFor(); p.destroy(); // 結果の読み込み result = read_file_as_text(file_name + ".out"); // ファイル削除 new File(file_name + ".in").delete(); new File(file_name + ".out").delete(); return result; } private static String read_file_as_text(String file_path) throws FileNotFoundException, UnsupportedEncodingException, IOException{ StringBuilder sb = new StringBuilder(); BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file_path), "UTF-8")); String line; while((line = br.readLine()) != null){ sb.append(line).append("\n"); } br.close(); return sb.toString(); } private static void write_file_as_text(String file_path, String text) throws UnsupportedEncodingException, IOException { BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file_path), "UTF-8")); bw.write(text); bw.flush(); bw.close(); } }