• 解析邮件文本内容; Mime文本解析; MimeStreamParser; multipart解析


    原始文本

    ------=_Part_46705_715015081.1699589700255
    Content-Type: text/html;charset=UTF-8
    Content-Transfer-Encoding: base64
    
    PGh0bWw+CiAgICA8aGVhZD4KICAgICAgICA8bWV0YSBodHRwLW
    VxdWl2PSJDb250ZW50LVR5cGUiIGNvbnRlbnQ9InRleHQvaHRt
    bDsgY2hhcnNldD1VVEYtOCI+CiAgICAgICAgPHRpdGxlPkpTUC
    BQYWdlPC90aXRsZT4KICAgIDwvaGVhZD4KICAgIDxib2R5Pgog
    ICAgICAgIDxoMT5IZWxsbyBXb3JsZCE8L2gxPgogICAgPC9ib2
    R5Pgo8L2h0bWw+
    ------=_Part_46705_715015081.1699589700255--
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11

    Maven

     <dependency>
         <groupId>org.apache.jamesgroupId>
         <artifactId>apache-mime4j-coreartifactId>
         <version>0.8.9version>
     dependency>
    
    • 1
    • 2
    • 3
    • 4
    • 5

    解析方法

    String data = "------=_Part_46705_715015081.1699589700255\n" +
            "Content-Type: text/html;charset=UTF-8\n" +
            "Content-Transfer-Encoding: base64\n" +
            "\n" +
            "PGh0bWw+CiAgICA8aGVhZD4KICAgICAgICA8bWV0YSBodHRwLW\n" +
            "VxdWl2PSJDb250ZW50LVR5cGUiIGNvbnRlbnQ9InRleHQvaHRt\n" +
            "bDsgY2hhcnNldD1VVEYtOCI+CiAgICAgICAgPHRpdGxlPkpTUC\n" +
            "BQYWdlPC90aXRsZT4KICAgIDwvaGVhZD4KICAgIDxib2R5Pgog\n" +
            "ICAgICAgIDxoMT5IZWxsbyBXb3JsZCE8L2gxPgogICAgPC9ib2\n" +
            "R5Pgo8L2h0bWw+\n" +
            "------=_Part_46705_715015081.1699589700255--";
    System.out.println(data);
    HtmContentHandler contentHandler = new HtmContentHandler();
    MimeConfig mime4jParserConfig = MimeConfig.DEFAULT;
    BodyDescriptorBuilder bodyDescriptorBuilder = new DefaultBodyDescriptorBuilder();
    MimeStreamParser mime4jParser = new MimeStreamParser(mime4jParserConfig, DecodeMonitor.SILENT, bodyDescriptorBuilder);
    mime4jParser.setContentDecoding(true);
    mime4jParser.setContentHandler(contentHandler);
    mime4jParser.parse(new ByteArrayInputStream(data.getBytes(UTF_8)));
    System.out.println(contentHandler.getData());
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20

    HtmContentHandler

    
    import org.apache.commons.io.IOUtils;
    import org.apache.james.mime4j.MimeException;
    import org.apache.james.mime4j.dom.Header;
    import org.apache.james.mime4j.field.ContentTypeFieldImpl;
    import org.apache.james.mime4j.message.SimpleContentHandler;
    import org.apache.james.mime4j.stream.BodyDescriptor;
    import org.apache.james.mime4j.stream.Field;
    
    import java.io.IOException;
    import java.io.InputStream;
    import java.util.Optional;
    
    /**
     * @author zengrenyuan
     * @date 2023/11/10
     **/
    public class HtmContentHandler extends SimpleContentHandler {
        private String data;
        private String charset;
        private String contentType;
    
        @Override
        public void body(BodyDescriptor bd, InputStream is) throws MimeException, IOException {
            this.data = IOUtils.toString(is, Optional.ofNullable(charset).orElse("UTF-8"));
            //这里可以处理文本内容
        }
    
        @Override
        public void headers(Header header) {
             //在这里解析头信息
            Field contentType = header.getField("Content-Type");
            if (contentType != null) {
                if (contentType instanceof ContentTypeFieldImpl) {
                    this.contentType = ((ContentTypeFieldImpl) contentType).getMimeType();
                    charset = ((ContentTypeFieldImpl) contentType).getParameter("charset");
                }
            }
        }
        public String getData() {
            return data;
        }
    
        public String getCharset() {
            return charset;
        }
    
        public String getContentType() {
            return contentType;
        }
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51

    参考资料

    https://james.apache.org/mime4j/index.html
    https://github.com/apache/james-mime4j

    如果想解析一段Email数据也可以参考
    https://github.com/ram-sharma-6453/email-mime-parser

  • 相关阅读:
    RocketMQ(20)——整合Spring Boot
    mysql反斜杠多次转义简述
    Vue 开发必须知道的 36 个技巧【近1W字】
    【宋红康 MySQL数据库 】【高级篇】【17】MySQL事务日志:redo、undo
    vscode 关闭tab键选中提示建议
    使用微信免费的内容安全识别接口,UGC场景开发检测违规内容功能
    Mysql事务+redo日志+锁分类+隔离级别+mvcc
    面试题 16.16. 部分排序-双指针法
    虚拟机上安装Linux出现的问题
    上网行为管理软件,能帮助企业解决哪些烦恼?
  • 原文地址:https://blog.csdn.net/zengrenyuan/article/details/134341997