正則表達(dá)式數(shù)據(jù)抽取regularexpressionadvanceuse

字號(hào):

以下是程序的輸出:
    Aim String:buffer size1=0x1234 buffer size2=1024 buffer size3=9999
    [src pattern]:buffer size1=VAR{HEX_NUM=>N1} buffer size2=VAR{STRING=>N2} buffer size3=VAR{DEC_NUM=>N3}
    [dst pattern]:buffer size1=(0x\d{1,}) buffer size2=(.*?) buffer size3=(\d{1,})
    [var List]
    N1 0x1234
    N2 1024
    N3 9999
    本意是分析Aim String,獲取size1 size2 size3的數(shù)據(jù),并且與3個(gè)變量N1 N2 N3關(guān)聯(lián)。
    為此定義了一個(gè)串在 src pattern中, 考試,大提示首先分析src pattern并得到目標(biāo)的正則表達(dá)式
    然后匹配數(shù)據(jù),在匹配的過程中將數(shù)據(jù)與關(guān)鍵字關(guān)聯(lián)。
    程序代碼如下:
    class UserStringPattern
    import java.util.ArrayList;
    import java.util.List;
    import java.util.regex.Pattern;
    public class UserStringPattern {
    public StringBuffer userPattern;
    public StringBuffer destPattern;
    public Pattern pattern;
    public List varList;
    public UserStringPattern(StringBuffer userPattern) {
    super();
    this.userPattern = userPattern;
    this.destPattern = new StringBuffer(userPattern);
    varList = new ArrayList();
    }
    @Override
    public String toString() {
    StringBuffer sb = new StringBuffer();
    sb.append("[src pattern]:" + userPattern);
    sb.append("\n");
    sb.append("[dst pattern]:" + destPattern);
    sb.append("\n");
    sb.append("[var List]\n");
    for (int i = 0; i < varList.size(); i++) {
    sb.append("\t" + varList.get(i).toString());
    sb.append("\n");
    }
    return sb.toString();
    }
    public String getValueByName(String name) {
    for (int i = 0; i < varList.size(); i++) {
    VarValue varValue = varList.get(i);
    if (varValue.varName.equals(name)) {
    return varValue.value;
    }
    }
    return "";
    }
    }
    class RegExpUtil
    package com.flylb.util;
    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    import java.util.regex.PatternSyntaxException;
    import org.apache.commons.logging.Log;
    import org.apache.commons.logging.LogFactory;
    class VarValue {
    public String varName;
    public String value;
    public VarValue(String varName, String value) {
    super();
    this.varName = varName;
    this.value = value;
    }
    @Override
    public String toString() {
    return varName + "\t" + value;
    }
    }
    @SuppressWarnings("unchecked")
    public class RegExpUtil {
    private static Log log = LogFactory.getLog(RegExpUtil.class);
    private static Map regExpMap = new HashMap();
    static {
    regExpMap.put("DEC_NUM", "(\\d{1,})");
    regExpMap.put("HEX_NUM", "(0x\\d{1,})");
    regExpMap.put("STRING", "(.*?)");
    }public static boolean creatRegExp(UserStringPattern userStringPattern) {
    List varList = userStringPattern.varList;
    StringBuffer sourcePattern = userStringPattern.userPattern;
    StringBuffer destPattern = userStringPattern.destPattern;
    varList.clear();
    Matcher m = Pattern.compile("VAR\\{(.*?)\\}").matcher(sourcePattern);
    String varToReplace = null, varExp = null, varName = null, varType = null;
    String type2Regexp = null;
    int pos = 0;
    int offset = 0;
    while (m.find()) {
    int start, end;
    start = m.start();
    end = m.end();
    varToReplace = sourcePattern.substring(start, end);
    // System.out.println("varToReplace:" + varToReplace);
    varExp = m.group(1);
    pos = varExp.indexOf("=>");
    if (pos == -1)
    return false;
    varType = varExp.substring(0, pos);
    varName = varExp.substring(pos + 2);
    varList.add(new VarValue(varName, null));
    // log.info(varType);
    // log.info(offset);
    // log.info(start + offset);
    // log.info(end + offset);
    // log.info(destPattern);
    type2Regexp = regExpMap.get(varType);
    if (type2Regexp == null) {
    return false;
    }
    destPattern.replace(start + offset, end + offset, type2Regexp);
    offset += type2Regexp.length() - varToReplace.length();
    }
    try {
    userStringPattern.pattern = Pattern.compile(userStringPattern.destPattern.toString());
    return true;
    } catch (PatternSyntaxException e) {
    log.info("Pattern error:" + userStringPattern.destPattern.toString());
    return false;
    }
    }
    public static void matchPattern(UserStringPattern userStringPattern, String aimString) {
    if (userStringPattern.pattern == null) {
    log.info("pattern is null!");
    return;
    }
    Matcher m = userStringPattern.pattern.matcher(aimString);
    List varList = userStringPattern.varList;
    while (m.find()) {
    for (int j = 1; j <= m.groupCount(); j++) {
    varList.get(j - 1).value = m.group(j);
    }
    }
    }
    public static void test() throws Exception {
    StringBuffer sourcePattern = new StringBuffer(
    "buffer size1=VAR{HEX_NUM=>N1} buffer size2=VAR{STRING=>N2} buffer size3=VAR{DEC_NUM=>N3}");
    UserStringPattern userStringPattern = new UserStringPattern(sourcePattern);
    creatRegExp(userStringPattern);
    String poem = "buffer size1=0x1234 buffer size2=1024 buffer size3=9999";
    log.info("Aim String:" + poem);
    matchPattern(userStringPattern, poem);
    log.info("\n" + userStringPattern);
    userStringPattern = null;
    }
    public static void main(String[] args) {
    try {
    test();
    } catch (Exception e) {
    e.printStackTrace();
    }
    }
    }