20,848
社区成员




public class Kmeans {
public static List<Vector> FileToVector(FileInputStream fs) throws IOException {
BufferedReader br = new BufferedReader(new InputStreamReader(fs));
String readLine = null;
List<Vector> points = new ArrayList<Vector>();
double []data=new double[20];
while ((readLine = br.readLine()) != null) {
String[] point=readLine.split(" ");
for(int j=0;j<20;j++)
data[j] = Double.parseDouble(point[j]);
//double[] fr = data;
Vector vec = new RandomAccessSparseVector(data.length);
vec.assign(data);
points.add(vec);
}
br.close();
return points;
}
public static void writePointsToFile(List<Vector> points,
String fileName,
FileSystem fs,
Configuration conf) throws IOException {
Path path = new Path(fileName);
//SequenceFile.Writer(FileSystem, Configuration, Path, key.getClass(), value.getClass())
SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf,
path, LongWritable.class, VectorWritable.class);
long recNum = 0;
VectorWritable vec = new VectorWritable();
for (Vector point : points) {
vec.set(point);
writer.append(new LongWritable(recNum++), vec);
}
writer.close();
}
public static void main(String args[]) throws Exception {
FileInputStream fis = new FileInputStream("E:\\Users\\ZZS\\Desktop\\结果\\new-data\\test.txt");
System.setProperty("HADOOP_USER_NAME", "root");
int k = 4;
List<Vector> vectors =FileToVector(fis);
//List<Vector> vectors = getPoints(points);
File testData = new File("clustering/testdata");
if (!testData.exists()) {
testData.mkdir();
}
testData = new File("clustering/testdata/points");
if (!testData.exists()) {
testData.mkdir();
}
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
//使得file1中存放可以被mahout识别的向量文件
writePointsToFile(vectors, "clustering/testdata/points/file1", fs, conf);
Path path = new Path("clustering/testdata/clusters/part-00000");
SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, Kluster.class);
for (int i = 0; i < k; i++) {
//int n=(int) (Math.random() * points.length);
Vector vec = vectors.get(i);
Kluster cluster = new Kluster(vec, i, new EuclideanDistanceMeasure());//采用欧几里得测距方式测量两点之间的距离
writer.append(new Text(cluster.getIdentifier()), cluster);
}
writer.close();
KMeansDriver.run(conf,
new Path("clustering/testdata/points"), //inputPath
new Path("clustering/testdata/clusters"), //ClusterPath
new Path("clustering/output"), //OutputPath
0.001, //convergenceDelta收敛系数
10, //MaxIterations
true, //runCLustering
0, //clusterClassificationThreshold
true); //runSequential
SequenceFile.Reader reader = new SequenceFile.Reader(fs,
new Path("clustering/output/" + "clusteredPoints"+"/part-m-0"), conf);
File f2 = new File("E:\\结果\\hadoop\\kmeans");
PrintStream ps = new PrintStream(f2.getPath());
//PrintStream Console = System.out;
System.setOut(ps);
IntWritable key = new IntWritable();
WeightedPropertyVectorWritable value = new WeightedPropertyVectorWritable();
//ClusterWritable value=new ClusterWritable();
while (reader.next(key, value)) {
System.out.println(value.toString() + " belongs to cluster " + key.toString());
}
ps.close();
reader.close();
}
}
]