问题内容所示：求解~~~

xxxlopyou 2011-10-03 02:39:03

如下，有这样的一堆连接，按照每一行一个连接存储在txt文档中。

http://123.com/uiudoa=jf123&dfd=123456
http://123.com/doa=jf123&dfd=123456
http://baolaoda.com/jiioa123fd=123456
http://123.com/jiioa123fd=123456
http://123.com/jiioa123fd=123456
http://46.com/jiidoa=jf56
http://xxrbz.com/jiioa123fd=123456
http://liumangtu.com/jiioa123fd=123456
http://123.com/jiioa123fd=123456

首先，这些链接按照每行一个链接存储在txt文本文档中。
然后，导入txt文件，对这些链接中做如下处理：
将只要出现了相同的域名，那么就只保留其中的任意一条，其他的全部删除。
最后，将处理后的结果保存。（也就是更新文本内容后保存）

PS:我是菜鸟级别，求各位大哥大姐详细点拨~~~Thanks very much!!

...全文

55 3 打赏收藏转发到动态举报

写回复

用AI写文章

3 条回复

切换为时间正序

请发表友善的回复…

发表回复

niuniu20008 2011-10-04

打赏
举报

香香结贴吧，我写的这个，代码量很多啊



package xiang;



import java.io.BufferedReader;

import java.io.BufferedWriter;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.InputStreamReader;

import java.io.OutputStreamWriter;

import java.io.PrintWriter;

import java.nio.charset.Charset;

import java.util.HashMap;

import java.util.Iterator;

import java.util.Map;

import java.util.Set;



public class Demo {



	public Map readFromTxt(String fileName) {

		File file = new File(fileName);



		// (一):建立【读】的管子

		FileInputStream input = null;

		InputStreamReader insr = null;

		BufferedReader read = null;



		Map map = new HashMap();// 存储域名和链接的map

		String line = "";

		try {

			input = new FileInputStream(file);// 读

			insr = new InputStreamReader(input, Charset.forName("UTF-8"));// 按照UTF-8字符集的方式读，也可以不写Charset.forName("UTF-8");

			read = new BufferedReader(insr);



			// (二)：读取一行，也就是一行一行的读

			line = read.readLine();

			while (line != null) { // 如果line==null证明已经读完了，如果line!=null证明没有读完

				// (二)：读取一行，也就是一行一行的读

				String realmName = getRealmName(line);

				if (map.size() == 0) {

					map.put(realmName, line);

				} else {

					// 遍历map，看map中是否已经存在这个域名。

					Set set = map.keySet();

					Iterator it = set.iterator();



					String key = "";// 用来存域名

					String value = "";// 用来存链接字符串

					while (it.hasNext()) {

						String n = (String) it.next();

						if (n.equals(realmName)) {

							break;// 如果已经存在，就跳出循环

						} else {

							// 如果不存在，就把域名和这一行字符串赋值给key,value

							key = realmName;

							value = line;

						}

					}

					if (!"".equals(key) && !"".equals(value)) {

						map.put(key, value);

						key = "";

						value = "";

					}

				}

				line = read.readLine();// 继续读

			}

		} catch (FileNotFoundException e) {

			e.printStackTrace();

		} catch (IOException e) {

			e.printStackTrace();

		} finally {

			try {

				// (三)：关闭流

				if (input != null) {

					input.close();

				}

				if (insr != null) {

					insr.close();

				}

				if (read != null) {

					read.close();

				}

			} catch (IOException e) {

				e.printStackTrace();

			}

		}



		return map;

	}



	/**

	 * 截取字符串中的域名 详细解析看RealmNameDemo

	 * */

	public String getRealmName(String line) {



		int begin = -1;

		int end = -1;

		int times = 1;

		for (int i = 0; i < line.length(); i++) {

			char c = line.charAt(i);

			if ('/' == c && times == 2) {

				begin = i;

				times++;

			} else if ('/' == c && times == 3) {

				end = i;

				break;

			} else if ('/' == c) {

				times++;

			}

		}

		String realmName = line.substring(begin + 1, end);

		return realmName;

	}



	/**

	 * 往新的txt文件中写

	 * */

	public void writeToTxt(String fileName, Map map) {

		File file = new File(fileName);

		if (file.exists()) {// 如果这个文件不存在，就新建一个

			file.mkdirs();

		}

		// (一):建立【写】的管子

		FileOutputStream output = null;

		OutputStreamWriter outsw = null;

		BufferedWriter buffWriter = null;

		PrintWriter write = null;



		try {

			output = new FileOutputStream(file);

			outsw = new OutputStreamWriter(output);

			buffWriter = new BufferedWriter(outsw);

			write = new PrintWriter(buffWriter,true);



			Set set = map.keySet();

			Iterator it = set.iterator();

			while (it.hasNext()) {

				String key = (String) it.next();

				String line = (String) map.get(key);

				write.println(line);// 写入

			}

			write.flush();// 刷新下管道

		} catch (FileNotFoundException e) {

			e.printStackTrace();

		} finally {

			try {

				if (write != null) {

					write.close();

				}

				if (buffWriter != null) {

					buffWriter.close();

				}

				if (outsw != null) {

					outsw.close();

				}

				if (output != null) {

					output.close();

				}

			} catch (IOException e) {

				e.printStackTrace();

			}

		}

	}



	public static void main(String[] args) {

		String readFileName = "F:\\MyWorkSpaces\\exerworkspace\\Struts2Web\\src\\xiang\\old.txt";

		Demo demo = new Demo();

		Map map = demo.readFromTxt(readFileName);



		String writeFileName = "F:\\MyWorkSpaces\\exerworkspace\\Struts2Web\\src\\xiang\\new.txt";

		demo.writeToTxt(writeFileName, map);

	}

}

柯本 2011-10-03

打赏
举报

完整的EX,利用HashMap,去掉重复的域名:



import java.io.*;

import java.util.regex.*;

import java.util.*;

public class filetest1 {



	/**

	 * @param args

	 * @throws IOException 

	 */

	public static void main(String[] args) throws IOException {

		// TODO Auto-generated method stub

		BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream("d:\\temp\\t.txt")));

		String data = null;

		String domain=null;

		HashMap map=new HashMap();  

		while((data = br.readLine())!=null)

		{

			Pattern pattern = Pattern.compile("(http://)(.*\\w+\\.\\w+)/.*");

			Matcher matcher = pattern.matcher(data);

			if (matcher.find()) {

		    domain=matcher.group(2).toString();

		    map.put(domain,data);

			}

		}

		br.close();

	    FileWriter fw = new FileWriter("d:\\temp\\t1.txt");  



		Iterator iter = map.entrySet().iterator(); 

		while (iter.hasNext()) { 

		    Map.Entry entry = (Map.Entry) iter.next(); 

		    Object val = entry.getValue();

		    data=val.toString()+"\n";

			fw.write(data,0,data.length());  

		} 

       fw.close();



	}



}

qybao 2011-10-03

打赏
举报

for example

BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream("test.txt")));

PrintStream ps = new PrintStream(new FileOutputStream("test_new.txt"));

String buf;

Set<String> domain = new HashSet<String>();

while ((buf=br.readLine()) != null) {

    if (buf.matches("(?i)http[s]?[:]//(www[.])?(.*?)/.*")) {

        String key = buf.replaceAll("(?i)http[s]?[:]//(www[.])?(.*?)/.*", "$2");

        if (domain.contains(key)) {

            continue;

        }

        domain.add(key);

        ps.println(buf);

    }

}

ps.close();

br.close();