HDFS数据导入Hbase出错，报后面key不能比前面key大，求大神

软考高级资源共享 2016-03-28 06:34:28



public class LoadDataToHBase {

	

	public static class LoadDataToHBaseMapper extends

			Mapper<LongWritable, Text, ImmutableBytesWritable, Text> {	

		

		public static int y,m,d,h,n,s,mm;

		Calendar cal=Calendar.getInstance();    

		//map的key用一个immutableBytesWritable类型的无意义的key，map的value是直接将原来的一行记录输出，

		//map完成后会shuffle和sort，将key-value按照key排序，否则写不进hfile，hfile要求后写的key不能小于先写的key

		private ImmutableBytesWritable immutableBytesWritable = new ImmutableBytesWritable();

		protected void map(LongWritable key, Text value, Context context)

				throws IOException, InterruptedException {

			immutableBytesWritable.set(Bytes.toBytes(key.get())); 

	        context.write(immutableBytesWritable, value);

		}

	}

	

	//reducer每次得到map输出的value就是输入文件的中的一行

	//reducer的key也无意义，每一个value值就是一个hfile格式的输出，包括rowkey、family、qualifier、timestamp、value

	public static class LoadDataToHBaseReducer extends

			Reducer<ImmutableBytesWritable, Text, ImmutableBytesWritable, KeyValue> {



		public static int y,m,d,h,n,s,mm;

		Calendar cal = Calendar.getInstance();



		protected void reduce(ImmutableBytesWritable key, Iterable<Text> values, 

	            Context context)

				throws IOException, InterruptedException {

			String value="";       

	        while(values.iterator().hasNext()) 

	        { 

	            value = values.iterator().next().toString(); 

	            if(value != null && !"".equals(value)) 

	            { 

	            	List<KeyValue> list = new ArrayList<KeyValue>();

	            	list = createKeyValue(value.toString());

	            	Iterator<KeyValue> it = list.iterator();

	        		while (it.hasNext()) {

	        			KeyValue kv = new KeyValue();

	        			kv = it.next();

	        			if(kv!=null) {

	        				context.write(key, kv);

	        			}                       

	        		}

	            } 

	        }

	    } 



		private List<KeyValue> createKeyValue(String str) {

			List<KeyValue> list = new ArrayList<KeyValue>();

				String[] values = str.toString().split("|");

				String[] qualifiersName = CONSTANT.qualifiersName;

				

				for (int i = 1; i < qualifiersName.length; i++) {

					long timeStamp = System.currentTimeMillis();

					String rownum = values[0];

					String family = CONSTANT.familyName;

					String qualifier = qualifiersName[i];

					String value_str = values[i];

					int y = cal.get(Calendar.YEAR) ;

					int m = cal.get(Calendar.MONTH)+1;

					int d = cal.get(Calendar.DATE);

					int h = cal.get(Calendar.HOUR);

					int n = cal.get(Calendar.MINUTE);

					int s = cal.get(Calendar.SECOND);

					int mm = cal.get(Calendar.MILLISECOND);

					String rowkey_str = timeStamp+"-"+Integer.toString(y)+Integer.toString(m)+"/"+Integer.toString(d)+

							Integer.toString(h)+Integer.toString(n)+Integer.toString(s)+"/"+Integer.toString(mm)+rownum+"-"+values[4]

									+"-"+values[5]+"-"+values[6];

					KeyValue kv = new KeyValue(Bytes.toBytes(rowkey_str),

							Bytes.toBytes(family), Bytes.toBytes(qualifier),

							System.currentTimeMillis(), Bytes.toBytes(value_str));

					if (i!=4||i!=5||i!=6) {

						list.add(kv);

					}

			}

			return list;

		}



		

}



	public static void main(String[] args) throws IOException,

			InterruptedException, ClassNotFoundException {

		Configuration conf = HBaseConfiguration.create();

		Job job = new Job(conf, CONSTANT.jobName);

		job.setJarByClass(LoadDataToHBase.class);



		job.setOutputKeyClass(ImmutableBytesWritable.class);

		//注意此处的Text.class要与map函数的输出key-value的value类型相对应

		job.setOutputValueClass(Text.class);



		job.setMapperClass(LoadDataToHBaseMapper.class);

		job.setReducerClass(LoadDataToHBaseReducer.class);

		// job.setOutputFormatClass(org.apache.hadoop.hbase.mapreduce.HFileOutputFormat.class);

		job.setOutputFormatClass(HFileOutputFormat.class);

		// job.setNumReduceTasks(4);

		// job.setPartitionerClass(org.apache.hadoop.hbase.mapreduce.SimpleTotalOrderPartitioner.class);



		Configuration fs_conf = new Configuration();

		FileSystem fs = FileSystem.get(fs_conf);

		String str_inPath = CONSTANT.str_inPath;

		String str_outPath = CONSTANT.str_outPath;

		//如果输出路径存在就先删掉，因为不允许输出路径事先存在

		Path outPath = new Path(str_outPath);

		if (fs.exists(outPath))

			fs.delete(outPath, true);



		FileInputFormat.addInputPath(job, new Path(str_inPath));

		FileOutputFormat.setOutputPath(job, new Path(str_outPath));



		System.exit(job.waitForCompletion(true) ? 0 : 1);

	}



}



public class CONSTANT {

	public static final String jobName = "LoadDataToHBase";

	public static final String[] qualifiersName = { "","01_home","04_name", "05_phone",

			"07_price", "08_room", "09_large", "10_floor","11_n","12_site","14_compay" };

//	public static final String[] qualifiersName = { "","00_url", "01_home", "02_what",

//			"03_compay2", "04_name", "05_phone", "06_title",

//			"07_price", "08_room", "09_large", "10_floor","11_n","12_site","13_compay" };

	public static final String familyName = "info";

	public static final String tableName = "hbase";

	public static final String str_inPath = "/user/hadoop/loadDataToHBase/input";

	public static final String str_outPath = "/user/hadoop/loadDataToHBase/output";

	public static final long timeStamp = System.currentTimeMillis();



}

报错



16/03/28 18:35:08 INFO mapreduce.Job:  map 100% reduce 67%

16/03/28 18:35:08 INFO mapreduce.Job: Task Id : attempt_1458611567937_0066_r_000000_2, Status : FAILED

Error: java.io.IOException: Added a key not lexically larger than previous key=\x00)1459161330442-20163/2863529/403-"-|-"-"-|\x04info00_url\x00\x00\x01S\xBC\xCA\xF3\x0B\x04, lastkey=\x00)1459161330442-20163/2863529/403-"-|-"-|-"\x04info13_compay\x00\x00\x01S\xBC\xCA\xF3\x0A\x04

	at org.apache.hadoop.hbase.io.hfile.AbstractHFileWriter.checkKey(AbstractHFileWriter.java:202)

	at org.apache.hadoop.hbase.io.hfile.HFileWriterV2.append(HFileWriterV2.java:288)

	at org.apache.hadoop.hbase.io.hfile.HFileWriterV2.append(HFileWriterV2.java:253)

	at org.apache.hadoop.hbase.regionserver.StoreFile$Writer.append(StoreFile.java:935)

	at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2$1.write(HFileOutputFormat2.java:196)

	at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2$1.write(HFileOutputFormat2.java:149)

	at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:558)

	at org.apache.hadoop.mapreduce.task.TaskInputOutputContextImpl.write(TaskInputOutputContextImpl.java:89)

	at org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer$Context.write(WrappedReducer.java:105)

	at test1.LoadDataToHBase$LoadDataToHBaseReducer.reduce(LoadDataToHBase.java:69)

	at test1.LoadDataToHBase$LoadDataToHBaseReducer.reduce(LoadDataToHBase.java:1)

	at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171)

	at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:627)

	at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389)

	at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:167)

	at java.security.AccessController.doPrivileged(Native Method)

	at javax.security.auth.Subject.doAs(Subject.java:415)

	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548)

	at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:162)