結局ファイル書き出しでmecab&cabocha

 パフォーマンス低下が著しいし,TOKENの設定が難しいので,入力データも出力データも一旦ファイルシステムを使うことに.
 将来的にはtmpfsを使えばいいかな.

 オプション指定があるので,前回とは違い,mecabおよびcabocha専用.

CabochaTest.java

import java.io.*;
import org.jpn.syo.*;
/**
 *
 * @author SyoTakasaki
 */
public class CabochaTest {

    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) throws Exception {
        // INIT
        String file_text = args[0];
        String data_text = null;
        String data_mecab = null;
        String data_cabocha = null;

        // テキストファイルの読み込み
        data_text = read_file_as_text(file_text);
        data_text = data_text.replaceAll("。", "。\n");
        data_text = data_text.replaceAll(".", ".\n");

        // Mecab
        data_mecab = Execute.exec("mecab", data_text);

        // Cabocha
        data_cabocha = Execute.exec("cabocha", data_mecab);

        System.out.println(data_mecab);
    }

    private static String read_file_as_text(String file_path) throws FileNotFoundException, UnsupportedEncodingException, IOException{
        StringBuilder sb = new StringBuilder();
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file_path), "UTF-8"));
        String line;
        while((line = br.readLine()) != null){
            sb.append(line).append("\n");
        }
        br.close();
        return sb.toString();
    }

}

org/jpn/syo/Execute.java

/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package org.jpn.syo;

import java.io.*;
/**
 *
 * @author SyoTakasaki
 */
public class Execute {

    private static final java.util.Random random = new java.util.Random();

    // cmdには"mecab"あるいは"cabocha"を受け付ける
    public static String exec(String cmd, String inputText) throws IOException, InterruptedException, Exception {
        String result = null;
        String file_name = null;
        String command = null;

        // ファイル名用ランダム文字列
        file_name = Long.toHexString(random.nextLong());
        file_name = new File(file_name).getAbsolutePath();

        // テキストファイルへの書き出し
        write_file_as_text(file_name + ".in", inputText);

        // Mecabの場合
        if(cmd.equals("mecab")){
            command = "/usr/bin/mecab " + file_name + ".in --output=" + file_name + ".out";
        }
        // Cabochaの場合
        if(cmd.equals("cabocha")){
            command = "/usr/bin/cabocha -f1 -I1 " + file_name + ".in --output=" + file_name + ".out";
        }
        if(command == null){
            Exception ex = new Exception("UnsupportedCommandException");
            throw ex;
        }

        // 実行
        Process p = new ProcessBuilder(command.split(" ")).start();
        p.waitFor();
        p.destroy();

        // 結果の読み込み
        result = read_file_as_text(file_name + ".out");

        // ファイル削除
        new File(file_name + ".in").delete();
        new File(file_name + ".out").delete();

        return result;
    }

    private static String read_file_as_text(String file_path) throws FileNotFoundException, UnsupportedEncodingException, IOException{
        StringBuilder sb = new StringBuilder();
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file_path), "UTF-8"));
        String line;
        while((line = br.readLine()) != null){
            sb.append(line).append("\n");
        }
        br.close();
        return sb.toString();
    }

    private static void write_file_as_text(String file_path, String text) throws UnsupportedEncodingException, IOException {
        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file_path), "UTF-8"));
        bw.write(text);
        bw.flush();
        bw.close();
    }
}