Hutool的JSONObject.getByPath(String expression)的用法

双喜 2022-05-19 13:33:44

在工作中碰到了要将一个XML格式的字符串解析其中自己需要的属性值,如图

private static final String path = "[xml_result].[read_sentence].[rec_paper].[read_sentence]";
public static void main(String[] args) {
        String str = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
                "  <xml_result>\n" +
                "      <read_sentence lan=\"cn\" type=\"study\" version=\"7,0,0,1024\">\n" +
                "          <rec_paper>\n" +
                "              <read_sentence accuracy_score=\"0.000000\" beg_pos=\"0\" content=\"今天天气怎么样。\" emotion_score=\"0.000000\" end_pos=\"236\" except_info=\"0\" fluency_score=\"85.277153\" integrity_score=\"100.000000\" is_rejected=\"false\" phone_score=\"78.571426\" time_len=\"236\" tone_score=\"100.000000\" total_score=\"62.654514\">\n" +
                "                  <sentence beg_pos=\"0\" content=\"今天天气怎么样\" end_pos=\"236\" fluency_score=\"0.000000\" phone_score=\"78.571426\" time_len=\"236\" tone_score=\"100.000000\" total_score=\"71.654205\">\n" +
                "                      <word beg_pos=\"0\" content=\"今\" end_pos=\"102\" symbol=\"jin1\" time_len=\"102\">\n" +
                "                          <syll beg_pos=\"0\" content=\"sil\" dp_message=\"0\" end_pos=\"70\" rec_node_type=\"sil\" time_len=\"70\">\n" +
                "                              <phone beg_pos=\"0\" content=\"sil\" dp_message=\"0\" end_pos=\"70\" rec_node_type=\"sil\" time_len=\"70\"></phone>\n" +
                "                          </syll>\n" +
                "                          <syll beg_pos=\"70\" content=\"fil\" dp_message=\"32\" end_pos=\"83\" rec_node_type=\"fil\" time_len=\"13\">\n" +
                "                              <phone beg_pos=\"70\" content=\"fil\" dp_message=\"32\" end_pos=\"83\" rec_node_type=\"fil\" time_len=\"13\"></phone>\n" +
                "                          </syll>\n" +
                "                          <syll beg_pos=\"83\" content=\"今\" dp_message=\"0\" end_pos=\"102\" rec_node_type=\"paper\" symbol=\"jin1\" time_len=\"19\">\n" +
                "                              <phone beg_pos=\"83\" content=\"j\" dp_message=\"0\" end_pos=\"90\" is_yun=\"0\" perr_level_msg=\"1\" perr_msg=\"0\" rec_node_type=\"paper\" time_len=\"7\"></phone>\n" +
                "                              <phone beg_pos=\"90\" content=\"in\" dp_message=\"0\" end_pos=\"102\" is_yun=\"1\" mono_tone=\"TONE1\" perr_level_msg=\"1\" perr_msg=\"0\" rec_node_type=\"paper\" time_len=\"12\"></phone>\n" +
                "                          </syll>\n" +
                "                      </word>\n" +
                "                      <word beg_pos=\"102\" content=\"天\" end_pos=\"126\" symbol=\"tian1\" time_len=\"24\">\n" +
                "                          <syll beg_pos=\"102\" content=\"天\" dp_message=\"0\" end_pos=\"126\" rec_node_type=\"paper\" symbol=\"tian1\" time_len=\"24\">\n" +
                "                              <phone beg_pos=\"102\" content=\"t\" dp_message=\"0\" end_pos=\"108\" is_yun=\"0\" perr_level_msg=\"1\" perr_msg=\"0\" rec_node_type=\"paper\" time_len=\"6\"></phone>\n" +
                "                              <phone beg_pos=\"108\" content=\"ian\" dp_message=\"0\" end_pos=\"126\" is_yun=\"1\" mono_tone=\"TONE1\" perr_level_msg=\"1\" perr_msg=\"0\" rec_node_type=\"paper\" time_len=\"18\"></phone>\n" +
                "                          </syll>\n" +
                "                      </word>\n" +
                "                      <word beg_pos=\"126\" content=\"天\" end_pos=\"152\" symbol=\"tian1\" time_len=\"26\">\n" +
                "                          <syll beg_pos=\"126\" content=\"天\" dp_message=\"0\" end_pos=\"152\" rec_node_type=\"paper\" symbol=\"tian1\" time_len=\"26\">\n" +
                "                              <phone beg_pos=\"126\" content=\"t\" dp_message=\"0\" end_pos=\"136\" is_yun=\"0\" perr_level_msg=\"1\" perr_msg=\"0\" rec_node_type=\"paper\" time_len=\"10\"></phone>\n" +
                "                              <phone beg_pos=\"136\" content=\"ian\" dp_message=\"0\" end_pos=\"152\" is_yun=\"1\" mono_tone=\"TONE1\" perr_level_msg=\"2\" perr_msg=\"0\" rec_node_type=\"paper\" time_len=\"16\"></phone>\n" +
                "                          </syll>\n" +
                "                      </word>\n" +
                "                      <word beg_pos=\"152\" content=\"气\" end_pos=\"174\" symbol=\"qi9\" time_len=\"22\">\n" +
                "                          <syll beg_pos=\"152\" content=\"气\" dp_message=\"0\" end_pos=\"174\" rec_node_type=\"paper\" symbol=\"qi0\" time_len=\"22\">\n" +
                "                              <phone beg_pos=\"152\" content=\"q\" dp_message=\"0\" end_pos=\"161\" is_yun=\"0\" perr_level_msg=\"1\" perr_msg=\"0\" rec_node_type=\"paper\" time_len=\"9\"></phone>\n" +
                "                              <phone beg_pos=\"161\" content=\"i\" dp_message=\"0\" end_pos=\"174\" is_yun=\"1\" mono_tone=\"TONE0\" perr_level_msg=\"1\" perr_msg=\"0\" rec_node_type=\"paper\" time_len=\"13\"></phone>\n" +
                "                          </syll>\n" +
                "                      </word>\n" +
                "                      <word beg_pos=\"174\" content=\"怎\" end_pos=\"186\" symbol=\"zen3\" time_len=\"12\">\n" +
                "                          <syll beg_pos=\"174\" content=\"怎\" dp_message=\"0\" end_pos=\"186\" rec_node_type=\"paper\" symbol=\"zen3\" time_len=\"12\">\n" +
                "                              <phone beg_pos=\"174\" content=\"z\" dp_message=\"0\" end_pos=\"180\" is_yun=\"0\" perr_level_msg=\"1\" perr_msg=\"0\" rec_node_type=\"paper\" time_len=\"6\"></phone>\n" +
                "                              <phone beg_pos=\"180\" content=\"en\" dp_message=\"0\" end_pos=\"186\" is_yun=\"1\" mono_tone=\"TONE3\" perr_level_msg=\"3\" perr_msg=\"1\" rec_node_type=\"paper\" time_len=\"6\"></phone>\n" +
                "                          </syll>\n" +
                "                      </word>\n" +
                "                      <word beg_pos=\"186\" content=\"么\" end_pos=\"197\" symbol=\"me5\" time_len=\"11\">\n" +
                "                          <syll beg_pos=\"186\" content=\"么\" dp_message=\"0\" end_pos=\"197\" rec_node_type=\"paper\" symbol=\"me0\" time_len=\"11\">\n" +
                "                              <phone beg_pos=\"186\" content=\"m\" dp_message=\"0\" end_pos=\"189\" is_yun=\"0\" perr_level_msg=\"3\" perr_msg=\"1\" rec_node_type=\"paper\" time_len=\"3\"></phone>\n" +
                "                              <phone beg_pos=\"189\" content=\"e\" dp_message=\"0\" end_pos=\"197\" is_yun=\"1\" mono_tone=\"TONE0\" perr_level_msg=\"3\" perr_msg=\"1\" rec_node_type=\"paper\" time_len=\"8\"></phone>\n" +
                "                          </syll>\n" +
                "                      </word>\n" +
                "                      <word beg_pos=\"197\" content=\"样\" end_pos=\"236\" symbol=\"yang4\" time_len=\"39\">\n" +
                "                          <syll beg_pos=\"197\" content=\"样\" dp_message=\"0\" end_pos=\"217\" rec_node_type=\"paper\" symbol=\"yang4\" time_len=\"20\">\n" +
                "                              <phone beg_pos=\"197\" content=\"_i\" dp_message=\"0\" end_pos=\"206\" is_yun=\"0\" perr_level_msg=\"1\" perr_msg=\"0\" rec_node_type=\"paper\" time_len=\"9\"></phone>\n" +
                "                              <phone beg_pos=\"206\" content=\"iang\" dp_message=\"0\" end_pos=\"217\" is_yun=\"1\" mono_tone=\"TONE4\" perr_level_msg=\"1\" perr_msg=\"0\" rec_node_type=\"paper\" time_len=\"11\"></phone>\n" +
                "                          </syll>\n" +
                "                          <syll beg_pos=\"217\" content=\"fil\" dp_message=\"32\" end_pos=\"236\" rec_node_type=\"fil\" time_len=\"19\">\n" +
                "                              <phone beg_pos=\"217\" content=\"fil\" end_pos=\"236\" time_len=\"19\"></phone>\n" +
                "                          </syll>\n" +
                "                      </word>\n" +
                "                  </sentence>\n" +
                "              </read_sentence>\n" +
                "          </rec_paper>\n" +
                "      </read_sentence>\n" +
                "  </xml_result>";
        JSONObject jsonObject = XML.toJSONObject(str);
        JSONObject readSentence = (JSONObject) jsonObject.getByPath(path);
        BigDecimal totalScore = (BigDecimal) readSentence.get("total_score");
        double v = totalScore.doubleValue();
        System.out.println(totalScore);
    }

先将XML字符串解析成hutool的JSONObject对象,但是由于想要的数据在第四层,使用JSONObject.get(Objet key)显然不太合适,然后再猜测之下,使用了JSONObject.getByPath(String expression)的方法,一开始expression参数写的是 "/xml_result/read_sentence/rec_paper/read_sentence"和"\\xml_result\\read_sentence\\rec_paper\\read_sentence",发现返回的都是null,后来去了hutool官网api文档也没有详细说明

 然后再追踪源码的时候发现

private static final char[] EXP_CHARS = new char[]{'.', '[', ']'};

private void init(String expression) {
        List<String> localPatternParts = new ArrayList();
        int length = expression.length();
        StrBuilder builder = StrUtil.strBuilder();
        boolean isNumStart = false;

        for(int i = 0; i < length; ++i) {
            char c = expression.charAt(i);
            if (0 == i && '$' == c) {
                this.isStartWith = true;
            } else if (ArrayUtil.contains(EXP_CHARS, c)) {
                if (']' == c) {
                    if (!isNumStart) {
                        throw new IllegalArgumentException(StrUtil.format("Bad expression '{}':{}, we find ']' but no '[' !", new Object[]{expression, i}));
                    }

                    isNumStart = false;
                } else {
                    if (isNumStart) {
                        throw new IllegalArgumentException(StrUtil.format("Bad expression '{}':{}, we find '[' but no ']' !", new Object[]{expression, i}));
                    }

                    if ('[' == c) {
                        isNumStart = true;
                    }
                }

                if (builder.length() > 0) {
                    localPatternParts.add(unWrapIfPossible(builder));
                }

                builder.reset();
            } else {
                builder.append(c);
            }
        }

        if (isNumStart) {
            throw new IllegalArgumentException(StrUtil.format("Bad expression '{}':{}, we find '[' but no ']' !", new Object[]{expression, length - 1}));
        } else {
            if (builder.length() > 0) {
                localPatternParts.add(unWrapIfPossible(builder));
            }

            this.patternParts = Collections.unmodifiableList(localPatternParts);
        }
    }

然后就把路径改成了:"[xml_result].[read_sentence].[rec_paper].[read_sentence]"写法,然后发现就请求到数据了,因为网上关于这个方法的介绍很少,就发个帖子供大家参考。

...全文
794 回复 打赏 收藏 转发到动态 举报
写回复
用AI写文章
回复
切换为时间正序
请发表友善的回复…
发表回复

51,411

社区成员

发帖
与我相关
我的任务
社区描述
Java相关技术讨论
javaspring bootspring cloud 技术论坛(原bbs)
社区管理员
  • Java相关社区
  • 小虚竹
  • 谙忆
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧