Java自動根據(jù)文件內(nèi)容的編碼來讀取避免亂碼

字號:


    通過cpdetector這個開源的jar包可以自動判斷當前文件的內(nèi)容編碼,從而在讀取的時候選擇正確的編碼讀取,避免亂碼問題。
    測試結(jié)果,提供截圖:
    
    package com.zuidaima.test;
    import info.monitorenter.cpdetector.io.ASCIIDetector;
    import info.monitorenter.cpdetector.io.CodepageDetectorProxy;
    import info.monitorenter.cpdetector.io.JChardetFacade;
    import info.monitorenter.cpdetector.io.ParsingDetector;
    import info.monitorenter.cpdetector.io.UnicodeDetector;
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.InputStreamReader;
    public class Main {
    public static String getContent(String path) throws Exception {
    File file = new File(path);
    CodepageDetectorProxy detector = CodepageDetectorProxy.getInstance();
    detector.add(new ParsingDetector(false));
    detector.add(JChardetFacade.getInstance());
    detector.add(ASCIIDetector.getInstance());
    detector.add(UnicodeDetector.getInstance());
    java.nio.charset.Charset charset = null;
    try {
    charset = detector.detectCodepage(file.toURI().toURL());
    } catch (Exception ex) {
    ex.printStackTrace();
    }
    String charsetName = null;
    if (charset != null) {
    charsetName = charset.name();
    } else {
    charsetName = "UTF-8";
    }
    BufferedReader reader = new BufferedReader(new InputStreamReader(
    new FileInputStream(file), charsetName));
    String line = null;
    String lines = "";
    while ((line = reader.readLine()) != null) {
    lines += line + " ";
    }
    reader.close();
    return lines;
    }
    public static void main(String[] args) throws Exception {
    System.out.println(getContent("bin/gbk.txt"));
    System.out.println(getContent("bin/utf8.txt"));
    }
    }