使用java將網(wǎng)頁保存為mht格式(2)

字號:

//設(shè)置網(wǎng)頁正文
    MimeBodyPart bp = new MimeBodyPart();
    bp.setText(content, strEncoding);
    bp.addHeader("Content-Type", "text/html;charset=" + strEncoding);
    bp.addHeader("Content-Location", strWeb.toString());
    mp.addBodyPart(bp);
    int urlCount = urlScriptList.size();
    for (int i = 0; i < urlCount; i++) {
    bp = new MimeBodyPart();
    ArrayList urlInfo = (ArrayList) urlScriptList.get(i);
    // String url = urlInfo.get(0).toString();
    String absoluteURL = urlInfo.get(1).toString();
    bp
    .addHeader("Content-Location",
    javax.mail.internet.MimeUtility
    .encodeWord(java.net.URLDecoder
    .decode(absoluteURL, strEncoding)));
    DataSource source = new AttachmentDataSource(absoluteURL, "text");
    bp.setDataHandler(new DataHandler(source));
    mp.addBodyPart(bp);
    }
    urlCount = urlImageList.size();
    for (int i = 0; i < urlCount; i++) {
    bp = new MimeBodyPart();
    ArrayList urlInfo = (ArrayList) urlImageList.get(i);
    // String url = urlInfo.get(0).toString();
    String absoluteURL = urlInfo.get(1).toString();
    bp
    .addHeader("Content-Location",
    javax.mail.internet.MimeUtility
    .encodeWord(java.net.URLDecoder
    .decode(absoluteURL, strEncoding)));
    DataSource source = new AttachmentDataSource(absoluteURL, "image");
    bp.setDataHandler(new DataHandler(source));
    mp.addBodyPart(bp);
    }
    msg.setContent(mp);
    // write the mime multi part message to a file
    msg.writeTo(new FileOutputStream(strFileName));
    }
    /**
    *方法說明:mht轉(zhuǎn)html
    *輸入?yún)?shù):strMht mht文件路徑; strHtml html文件路徑
    *返回類型:
    */
    public static void mht2html(String strMht, String strHtml) {
    try {
    //TODO readEmlFile
    InputStream fis = new FileInputStream(strMht);
    Session mailSession = Session.getDefaultInstance(System.getProperties(), null);
    MimeMessage msg = new MimeMessage(mailSession, fis);
    Object content = msg.getContent();
    if (content instanceof Multipart) {
    MimeMultipart mp = (MimeMultipart)content;
    MimeBodyPart bp1 = (MimeBodyPart)mp.getBodyPart(0);
    String strEncodng = getEncoding(bp1);
    String strText = getHtmlText(bp1, strEncodng);
    if (strText == null)
    return;
    File parent = null;
    if (mp.getCount() > 1) {
    parent = new File(new File(strHtml).getAbsolutePath() + ".files");
    parent.mkdirs();
    if (!parent.exists())
    return;
    }
    for (int i = 1; i < mp.getCount(); ++i) {
    MimeBodyPart bp = (MimeBodyPart)mp.getBodyPart(i);
    String strUrl = getResourcesUrl(bp);
    if (strUrl == null)
    continue;
    DataHandler dataHandler = bp.getDataHandler();
    MimePartDataSource source = (MimePartDataSource)dataHandler.getDataSource();
    File resources = new File(parent.getAbsolutePath() + File.separator + getName(strUrl, i));
    if (saveResourcesFile(resources, bp.getInputStream()))
    strText = JHtmlClear.replace(strText, strUrl, resources.getAbsolutePath());
    }
    saveHtml(strText, strHtml);
    }
    } catch (Exception e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
    }
    }
    /**
    *方法說明:得到資源文件的name
    *輸入?yún)?shù):strName 資源文件鏈接, ID 資源文件的序號
    *返回類型:資源文件的本地臨時(shí)文件名
    */
    public static String getName(String strName, int ID) {
    char separator = ’/’;
    System.out.println(strName);
    System.out.println(separator);
    if( strName.lastIndexOf(separator) >= 0)
    return format(strName.substring(strName.lastIndexOf(separator) + 1));
    return "temp" + ID;
    }
    /**
    *方法說明:得到網(wǎng)頁編碼
    *輸入?yún)?shù):bp MimeBodyPart類型的網(wǎng)頁內(nèi)容
    *返回類型:MimeBodyPart里的網(wǎng)頁內(nèi)容的編碼
    */
    private static String getEncoding(MimeBodyPart bp) {
    if (bp != null) {
    try {
    Enumeration list = bp.getAllHeaders();
    while (list.hasMoreElements()) {
    javax.mail.Header head = (javax.mail.Header)list.nextElement();
    if (head.getName().compareTo("Content-Type") == 0) {
    String strType = head.getValue();
    int pos = strType.indexOf("charset=");
    if (pos != -1) {
    String strEncoding = strType.substring(pos + 8, strType.length());
    if (strEncoding.toLowerCase().compareTo("gb2312") == 0) {
    strEncoding = "gbk";
    }
    return strEncoding;
    }
    }
    }
    } catch (MessagingException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
    }
    }
    return null;
    }
    /**
    *方法說明:得到資源文件url
    *輸入?yún)?shù):bp MimeBodyPart類型的網(wǎng)頁內(nèi)容
    *返回類型:資源文件url
    */  private static String getResourcesUrl(MimeBodyPart bp) {
    if (bp != null) {
    try {
    Enumeration list = bp.getAllHeaders();
    while (list.hasMoreElements()) {
    javax.mail.Header head = (javax.mail.Header)list.nextElement();
    if (head.getName().compareTo("Content-Location") == 0) {
    return head.getValue();
    }
    }
    } catch (MessagingException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
    }
    }
    return null;
    }
    /**
    *方法說明:格式化文件名
    *輸入?yún)?shù):strName 文件名
    *返回類型:經(jīng)過處理的符合命名規(guī)則的文件名
    */
    private static String format(String strName) {
    if (strName == null)
    return null;
    strName = strName.replaceAll(" ", " ");
    String strText = "/:*?"<>|^___FCKpd___0quot;;
    for (int i = 0; i < strName.length(); ++i) {
    String ch = String.valueOf(strName.charAt(i));
    if (strText.indexOf(ch) != -1) {
    strName = strName.replace(strName.charAt(i), ’-’);
    }
    }
    return strName;
    }
    /**
    *方法說明:保存資源文件
    *輸入?yún)?shù):resources 要?jiǎng)?chuàng)建的資源文件; inputStream 要輸入文件中的流
    *返回類型:boolean
    */
    private static boolean saveResourcesFile(File resources, InputStream inputStream) {
    if (resources == null || inputStream == null) {
    return false;
    }
    BufferedInputStream in = null;
    FileOutputStream fio = null;
    BufferedOutputStream osw = null;
    try {
    in = new BufferedInputStream(inputStream);
    fio = new FileOutputStream(resources);
    osw = new BufferedOutputStream(new DataOutputStream(fio));
    int b;
    byte[] a = new byte[1024];
    boolean isEmpty = true;
    while ((b = in.read(a)) != -1) {
    isEmpty = false;
    osw.write(a, 0, b);
    osw.flush();
    }
    osw.close();
    fio.close();
    in.close();
    inputStream.close();
    if (isEmpty)
    resources.delete();
    return true;
    } catch (Exception e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
    System.out.println("解析mht失敗");
    return false;
    } finally{
    try {
    if (osw != null)
    osw.close();
    if (fio != null)
    fio.close();
    if (in != null)
    in.close();
    if (inputStream != null)
    inputStream.close();
    } catch (Exception e) {
    e.printStackTrace();
    System.out.println("解析mht失敗");
    return false;
    }
    }
    }
    /**
    *方法說明:得到mht文件的標(biāo)題
    *輸入?yún)?shù):mhtFilename mht文件名
    *返回類型:mht文件的標(biāo)題
    */
    public static String getTitle(String mhtFilename) {
    try {
    //TODO readEmlFile
    InputStream fis = new FileInputStream(mhtFilename);
    Session mailSession = Session.getDefaultInstance(System.getProperties(), null);
    MimeMessage msg = new MimeMessage(mailSession, fis);
    Object content = msg.getContent();
    if (content instanceof Multipart) {
    MimeMultipart mp = (MimeMultipart)content;
    MimeBodyPart bp1 = (MimeBodyPart)mp.getBodyPart(0);
    String strEncodng = getEncoding(bp1);
    String strText = getHtmlText(bp1, strEncodng);
    if (strText == null)
    return null;
    strText = strText.toLowerCase();
    int pos1 = strText.indexOf("");<BR>    int pos2 = strText.indexOf("");
    if (pos1 != -1 && pos2!= -1 && pos2 > pos1) {
    return strText.substring(pos1 + 7, pos2).trim();
    }
    }
    return null;
    } catch (Exception e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
    return null;
    }
    }
    /**
    *方法說明:得到html文本
    *輸入?yún)?shù):bp MimeBodyPart類型的網(wǎng)頁內(nèi)容; strEncoding 內(nèi)容編碼
    *返回類型:html文本
    */ private static String getHtmlText(MimeBodyPart bp, String strEncoding) {
    InputStream textStream = null;
    BufferedInputStream buff = null;
    BufferedReader br = null;
    Reader r = null;
    try {
    textStream = bp.getInputStream();
    buff = new BufferedInputStream(textStream);
    r = new InputStreamReader(buff, strEncoding);
    br = new BufferedReader(r);
    StringBuffer strHtml = new StringBuffer("");
    String strLine = null;
    while ((strLine = br.readLine()) != null) {
    strHtml.append(strLine + "rn");
    }
    br.close();
    r.close();
    textStream.close();
    return strHtml.toString();
    } catch (Exception e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
    } finally{
    try{
    if (br != null)
    br.close();
    if (buff != null)
    buff.close();
    if (textStream != null)
    textStream.close();
    }catch(Exception e){
    System.out.println("解析mht失敗");
    }
    }
    return null;
    }
    /**
    *方法說明:保存html文件
    *輸入?yún)?shù):strText html內(nèi)容; strHtml html文件名
    *返回類型:
    */
    private static void saveHtml(String strText, String strHtml) {
    try {
    FileWriter fw = new FileWriter(strHtml);
    fw.write(strText);
    fw.close();
    } catch (IOException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
    System.out.println("解析mht失敗");
    }
    }
    private InternetAddress[] getInetAddresses(String emails) throws Exception {
    ArrayList list = new ArrayList();
    StringTokenizer tok = new StringTokenizer(emails, ",");
    while (tok.hasMoreTokens()) {
    list.add(tok.nextToken());
    }
    int count = list.size();
    InternetAddress[] addresses = new InternetAddress[count];
    for (int i = 0; i < count; i++) {
    addresses[i] = new InternetAddress(list.get(i).toString());
    }
    return addresses;
    }
    class AttachmentDataSource implements DataSource {
    private MimetypesFileTypeMap map = new MimetypesFileTypeMap();
    private String strUrl;
    private String strType;
    private byte[] dataSize = null;
    /**
    * This is some content type maps.
    */
    private Map normalMap = new HashMap();
    {
    // Initiate normal mime type map
    // Images
    normalMap.put("image", "image/jpeg");
    normalMap.put("text", "text/plain");
    }
    public AttachmentDataSource(String strUrl, String strType) {
    this.strType = strType;
    this.strUrl = strUrl;
    strUrl = strUrl.trim();
    strUrl = strUrl.replaceAll(" ", "%20");
    dataSize = JQuery.downBinaryFile(strUrl, null);
    }
    /**
    * Returns the content type.
    */
    public String getContentType() {
    return getMimeType(getName());
    }
    public String getName() {
    char separator = File.separatorChar;
    if( strUrl.lastIndexOf(separator) >= 0 )
    return strUrl.substring(strUrl.lastIndexOf(separator) + 1);
    return strUrl;
    }
    private String getMimeType(String fileName) {
    String type = (String)normalMap.get(strType);
    if (type == null) {
    try {
    type = map.getContentType(fileName);
    } catch (Exception e) {
    // TODO: handle exception
    }
    System.out.println(type);
    // Fix the null exception
    if (type == null) {
    type = "application/octet-stream";
    }
    }
    return type;
    }
    public InputStream getInputStream() throws IOException {
    // TODO Auto-generated method stub
    if (dataSize == null)
    dataSize = new byte[0];
    return new ByteArrayInputStream(dataSize);
    }
    public OutputStream getOutputStream() throws IOException {
    // TODO Auto-generated method stub
    return new java.io.ByteArrayOutputStream();
    }
    }
    }