用java实现两个文本文档内容比较

gjlkinglong 2012-01-24 09:07:57

有两个文本文档，我想从一个文本文档（a.txt）里面逐个读取词组，与第二个文本文档的所有词组比较，如果第二个文本文档(dic.txt)包含这个词组，则打印出该词组，这个具体怎么实现，最好能直接贴代码比较直观点，说是用HASHTABLE可以实现，这具体是如何实现的？（一个词组在文本文件里是一行）
另外，如果读取的第一单词存在于dic.txt中，那么第二个单词继续比对dic.txt后也存在的话，需要和之前的第一个读取的单词进行对比，如单词多一个、少一个或有一个字母不同，或是顺序相反但是字母相同，则输出对应是多或是少或是改变过一个字母，并则继续把a.txt后续词组继续和dic.txt对比（并继续重复对比之前比对词组的过程），直到某个单词数比对超过正负两个或内容改变超过两个字母就退出，或者直到a.txt比较结束，并输出比对了几个词组（指a.txt中的）

...全文

1541 6 打赏收藏转发到动态举报

写回复

用AI写文章

6 条回复

切换为时间正序

请发表友善的回复…

发表回复

MiceRice 2012-01-27

打赏
举报

貌似还是我在回复你啊，你后面的要求，其实类似于做“集合减法”运算，不考虑顺序的话，就是一组字符集（单词）减去另一组字符集（单词），还剩下什么。



public class TextCompare {

    public static void main(String[] args) throws Exception {

        minus("nnice", "nmice");

        minus("nmice", "nnice");

        minus("nmice", "nrice");

        minus("rrice", "nnice");

        minus("naice", "mdice");

    }

    

    public static List<Character> minus(String base, String minus) {

        List<Character> cb = toChars(base);

        List<Character> cm = toChars(minus);

        List<Character> ret = minus(cb, cm);

        System.out.println(ret);

        return ret;

    }



    private static List<Character> minus(List<Character> base, List<Character> minus) {

        List<Character> ret = new ArrayList<Character>();

        ret.addAll(base);

        for (Character c : minus) {

            for (int i = 0; i < ret.size(); i++) {

                if (ret.get(i).equals(c)) {

                    ret.remove(i);

                    break;

                }

            }

        }



        return ret;

    }



    private static List<Character> toChars(String str) {

        List<Character> ret = new ArrayList<Character>();

        char[] cs = str.toCharArray();

        for (char c : cs) {

            ret.add(c);

        }

        return ret;

    }

}

MiceRice 2012-01-25

打赏
举报

楼主已经发了两个帖子了啊。直接求代码，很难有人有空帮你，我先提供个字典类的简易实现吧：



public class TextCompare {



    public static void main(String[] args) throws Exception {

        Dict dict = new Dict("dict.txt");

        System.out.println("hello: " + dict.contains("hello"));

        System.out.println("nice: " + dict.contains("nice"));

    }



}



/**

 * 字典：构造行数中传入字典文件的路径和名称。

 */

class Dict {

    private HashSet<String> hsDict = new HashSet<String>();



    public Dict(String dictFileName) throws Exception {

        Scanner sc = null;

        try {

            // 定位字典文件

            File f = new File(dictFileName);

            if (f.exists()) {

                sc = new Scanner(f);

            } else {

                sc = new Scanner(this.getClass().getResourceAsStream(dictFileName));

            }



            // 开始装载字典文件中的所有单词

            long timer = System.currentTimeMillis();

            while (sc.hasNext()) {

                hsDict.add(sc.next());

            }

            timer = System.currentTimeMillis() - timer;

            System.out.println("Dict inited, spend: " + timer + "ms, get words: " + hsDict.size());



        } finally {

            if (sc != null) {

                sc.close();

            }

        }

    }



    public boolean contains(String word) {

        return hsDict.contains(word);

    }



}