问题内容所示:求解~~~

xxxlopyou 2011-10-03 02:39:03

如下,有这样的一堆连接,按照每一行一个连接存储在txt文档中。

http://123.com/uiudoa=jf123&dfd=123456
http://123.com/doa=jf123&dfd=123456
http://baolaoda.com/jiioa123fd=123456
http://123.com/jiioa123fd=123456
http://123.com/jiioa123fd=123456
http://46.com/jiidoa=jf56
http://xxrbz.com/jiioa123fd=123456
http://liumangtu.com/jiioa123fd=123456
http://123.com/jiioa123fd=123456

首先,这些链接按照每行一个链接存储在txt文本文档中。
然后,导入txt文件,对这些链接中做如下处理:
将只要出现了相同的域名,那么就只保留其中的任意一条,其他的全部删除。
最后,将处理后的结果保存。(也就是更新文本内容后保存)


PS:我是菜鸟级别,求各位大哥大姐详细点拨~~~Thanks very much!!
...全文
51 3 打赏 收藏 转发到动态 举报
写回复
用AI写文章
3 条回复
切换为时间正序
请发表友善的回复…
发表回复
niuniu20008 2011-10-04
  • 打赏
  • 举报
回复
香香结贴吧,我写的这个,代码量很多啊

package xiang;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

public class Demo {

public Map readFromTxt(String fileName) {
File file = new File(fileName);

// (一):建立【读】的管子
FileInputStream input = null;
InputStreamReader insr = null;
BufferedReader read = null;

Map map = new HashMap();// 存储域名和链接的map
String line = "";
try {
input = new FileInputStream(file);// 读
insr = new InputStreamReader(input, Charset.forName("UTF-8"));// 按照UTF-8字符集的方式读,也可以不写Charset.forName("UTF-8");
read = new BufferedReader(insr);

// (二):读取一行,也就是一行一行的读
line = read.readLine();
while (line != null) { // 如果line==null证明已经读完了,如果line!=null证明没有读完
// (二):读取一行,也就是一行一行的读
String realmName = getRealmName(line);
if (map.size() == 0) {
map.put(realmName, line);
} else {
// 遍历map,看map中是否已经存在这个域名。
Set set = map.keySet();
Iterator it = set.iterator();

String key = "";// 用来存域名
String value = "";// 用来存链接字符串
while (it.hasNext()) {
String n = (String) it.next();
if (n.equals(realmName)) {
break;// 如果已经存在,就跳出循环
} else {
// 如果不存在,就把域名和这一行字符串赋值给key,value
key = realmName;
value = line;
}
}
if (!"".equals(key) && !"".equals(value)) {
map.put(key, value);
key = "";
value = "";
}
}
line = read.readLine();// 继续读
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
// (三):关闭流
if (input != null) {
input.close();
}
if (insr != null) {
insr.close();
}
if (read != null) {
read.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}

return map;
}

/**
* 截取字符串中的域名 详细解析看RealmNameDemo
* */
public String getRealmName(String line) {

int begin = -1;
int end = -1;
int times = 1;
for (int i = 0; i < line.length(); i++) {
char c = line.charAt(i);
if ('/' == c && times == 2) {
begin = i;
times++;
} else if ('/' == c && times == 3) {
end = i;
break;
} else if ('/' == c) {
times++;
}
}
String realmName = line.substring(begin + 1, end);
return realmName;
}

/**
* 往新的txt文件中写
* */
public void writeToTxt(String fileName, Map map) {
File file = new File(fileName);
if (file.exists()) {// 如果这个文件不存在,就新建一个
file.mkdirs();
}
// (一):建立【写】的管子
FileOutputStream output = null;
OutputStreamWriter outsw = null;
BufferedWriter buffWriter = null;
PrintWriter write = null;

try {
output = new FileOutputStream(file);
outsw = new OutputStreamWriter(output);
buffWriter = new BufferedWriter(outsw);
write = new PrintWriter(buffWriter,true);

Set set = map.keySet();
Iterator it = set.iterator();
while (it.hasNext()) {
String key = (String) it.next();
String line = (String) map.get(key);
write.println(line);// 写入
}
write.flush();// 刷新下管道
} catch (FileNotFoundException e) {
e.printStackTrace();
} finally {
try {
if (write != null) {
write.close();
}
if (buffWriter != null) {
buffWriter.close();
}
if (outsw != null) {
outsw.close();
}
if (output != null) {
output.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}

public static void main(String[] args) {
String readFileName = "F:\\MyWorkSpaces\\exerworkspace\\Struts2Web\\src\\xiang\\old.txt";
Demo demo = new Demo();
Map map = demo.readFromTxt(readFileName);

String writeFileName = "F:\\MyWorkSpaces\\exerworkspace\\Struts2Web\\src\\xiang\\new.txt";
demo.writeToTxt(writeFileName, map);
}
}

柯本 2011-10-03
  • 打赏
  • 举报
回复
完整的EX,利用HashMap,去掉重复的域名:

import java.io.*;
import java.util.regex.*;
import java.util.*;
public class filetest1 {

/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream("d:\\temp\\t.txt")));
String data = null;
String domain=null;
HashMap map=new HashMap();
while((data = br.readLine())!=null)
{
Pattern pattern = Pattern.compile("(http://)(.*\\w+\\.\\w+)/.*");
Matcher matcher = pattern.matcher(data);
if (matcher.find()) {
domain=matcher.group(2).toString();
map.put(domain,data);
}
}
br.close();
FileWriter fw = new FileWriter("d:\\temp\\t1.txt");

Iterator iter = map.entrySet().iterator();
while (iter.hasNext()) {
Map.Entry entry = (Map.Entry) iter.next();
Object val = entry.getValue();
data=val.toString()+"\n";
fw.write(data,0,data.length());
}
fw.close();

}

}

qybao 2011-10-03
  • 打赏
  • 举报
回复
for example
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream("test.txt")));
PrintStream ps = new PrintStream(new FileOutputStream("test_new.txt"));
String buf;
Set<String> domain = new HashSet<String>();
while ((buf=br.readLine()) != null) {
if (buf.matches("(?i)http[s]?[:]//(www[.])?(.*?)/.*")) {
String key = buf.replaceAll("(?i)http[s]?[:]//(www[.])?(.*?)/.*", "$2");
if (domain.contains(key)) {
continue;
}
domain.add(key);
ps.println(buf);
}
}
ps.close();
br.close();

58,455

社区成员

发帖
与我相关
我的任务
社区描述
Java Eclipse
社区管理员
  • Eclipse
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧