HDFS数据导入Hbase出错,报后面key不能比前面key大,求大神

软考高级资源共享 2016-03-28 06:34:28

public class LoadDataToHBase {

public static class LoadDataToHBaseMapper extends
Mapper<LongWritable, Text, ImmutableBytesWritable, Text> {

public static int y,m,d,h,n,s,mm;
Calendar cal=Calendar.getInstance();
//map的key用一个immutableBytesWritable类型的无意义的key,map的value是直接将原来的一行记录输出,
//map完成后会shuffle和sort,将key-value按照key排序,否则写不进hfile,hfile要求后写的key不能小于先写的key
private ImmutableBytesWritable immutableBytesWritable = new ImmutableBytesWritable();
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
immutableBytesWritable.set(Bytes.toBytes(key.get()));
context.write(immutableBytesWritable, value);
}
}

//reducer每次得到map输出的value就是输入文件的中的一行
//reducer的key也无意义,每一个value值就是一个hfile格式的输出,包括rowkey、family、qualifier、timestamp、value
public static class LoadDataToHBaseReducer extends
Reducer<ImmutableBytesWritable, Text, ImmutableBytesWritable, KeyValue> {

public static int y,m,d,h,n,s,mm;
Calendar cal = Calendar.getInstance();

protected void reduce(ImmutableBytesWritable key, Iterable<Text> values,
Context context)
throws IOException, InterruptedException {
String value="";
while(values.iterator().hasNext())
{
value = values.iterator().next().toString();
if(value != null && !"".equals(value))
{
List<KeyValue> list = new ArrayList<KeyValue>();
list = createKeyValue(value.toString());
Iterator<KeyValue> it = list.iterator();
while (it.hasNext()) {
KeyValue kv = new KeyValue();
kv = it.next();
if(kv!=null) {
context.write(key, kv);
}
}
}
}
}

private List<KeyValue> createKeyValue(String str) {
List<KeyValue> list = new ArrayList<KeyValue>();
String[] values = str.toString().split("|");
String[] qualifiersName = CONSTANT.qualifiersName;

for (int i = 1; i < qualifiersName.length; i++) {
long timeStamp = System.currentTimeMillis();
String rownum = values[0];
String family = CONSTANT.familyName;
String qualifier = qualifiersName[i];
String value_str = values[i];
int y = cal.get(Calendar.YEAR) ;
int m = cal.get(Calendar.MONTH)+1;
int d = cal.get(Calendar.DATE);
int h = cal.get(Calendar.HOUR);
int n = cal.get(Calendar.MINUTE);
int s = cal.get(Calendar.SECOND);
int mm = cal.get(Calendar.MILLISECOND);
String rowkey_str = timeStamp+"-"+Integer.toString(y)+Integer.toString(m)+"/"+Integer.toString(d)+
Integer.toString(h)+Integer.toString(n)+Integer.toString(s)+"/"+Integer.toString(mm)+rownum+"-"+values[4]
+"-"+values[5]+"-"+values[6];
KeyValue kv = new KeyValue(Bytes.toBytes(rowkey_str),
Bytes.toBytes(family), Bytes.toBytes(qualifier),
System.currentTimeMillis(), Bytes.toBytes(value_str));
if (i!=4||i!=5||i!=6) {
list.add(kv);
}
}
return list;
}


}

public static void main(String[] args) throws IOException,
InterruptedException, ClassNotFoundException {
Configuration conf = HBaseConfiguration.create();
Job job = new Job(conf, CONSTANT.jobName);
job.setJarByClass(LoadDataToHBase.class);

job.setOutputKeyClass(ImmutableBytesWritable.class);
//注意此处的Text.class要与map函数的输出key-value的value类型相对应
job.setOutputValueClass(Text.class);

job.setMapperClass(LoadDataToHBaseMapper.class);
job.setReducerClass(LoadDataToHBaseReducer.class);
// job.setOutputFormatClass(org.apache.hadoop.hbase.mapreduce.HFileOutputFormat.class);
job.setOutputFormatClass(HFileOutputFormat.class);
// job.setNumReduceTasks(4);
// job.setPartitionerClass(org.apache.hadoop.hbase.mapreduce.SimpleTotalOrderPartitioner.class);

Configuration fs_conf = new Configuration();
FileSystem fs = FileSystem.get(fs_conf);
String str_inPath = CONSTANT.str_inPath;
String str_outPath = CONSTANT.str_outPath;
//如果输出路径存在就先删掉,因为不允许输出路径事先存在
Path outPath = new Path(str_outPath);
if (fs.exists(outPath))
fs.delete(outPath, true);

FileInputFormat.addInputPath(job, new Path(str_inPath));
FileOutputFormat.setOutputPath(job, new Path(str_outPath));

System.exit(job.waitForCompletion(true) ? 0 : 1);
}

}



public class CONSTANT {
public static final String jobName = "LoadDataToHBase";
public static final String[] qualifiersName = { "","01_home","04_name", "05_phone",
"07_price", "08_room", "09_large", "10_floor","11_n","12_site","14_compay" };
// public static final String[] qualifiersName = { "","00_url", "01_home", "02_what",
// "03_compay2", "04_name", "05_phone", "06_title",
// "07_price", "08_room", "09_large", "10_floor","11_n","12_site","13_compay" };
public static final String familyName = "info";
public static final String tableName = "hbase";
public static final String str_inPath = "/user/hadoop/loadDataToHBase/input";
public static final String str_outPath = "/user/hadoop/loadDataToHBase/output";
public static final long timeStamp = System.currentTimeMillis();

}



报错

16/03/28 18:35:08 INFO mapreduce.Job: map 100% reduce 67%
16/03/28 18:35:08 INFO mapreduce.Job: Task Id : attempt_1458611567937_0066_r_000000_2, Status : FAILED
Error: java.io.IOException: Added a key not lexically larger than previous key=\x00)1459161330442-20163/2863529/403-"-|-"-"-|\x04info00_url\x00\x00\x01S\xBC\xCA\xF3\x0B\x04, lastkey=\x00)1459161330442-20163/2863529/403-"-|-"-|-"\x04info13_compay\x00\x00\x01S\xBC\xCA\xF3\x0A\x04
at org.apache.hadoop.hbase.io.hfile.AbstractHFileWriter.checkKey(AbstractHFileWriter.java:202)
at org.apache.hadoop.hbase.io.hfile.HFileWriterV2.append(HFileWriterV2.java:288)
at org.apache.hadoop.hbase.io.hfile.HFileWriterV2.append(HFileWriterV2.java:253)
at org.apache.hadoop.hbase.regionserver.StoreFile$Writer.append(StoreFile.java:935)
at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2$1.write(HFileOutputFormat2.java:196)
at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2$1.write(HFileOutputFormat2.java:149)
at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:558)
at org.apache.hadoop.mapreduce.task.TaskInputOutputContextImpl.write(TaskInputOutputContextImpl.java:89)
at org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer$Context.write(WrappedReducer.java:105)
at test1.LoadDataToHBase$LoadDataToHBaseReducer.reduce(LoadDataToHBase.java:69)
at test1.LoadDataToHBase$LoadDataToHBaseReducer.reduce(LoadDataToHBase.java:1)
at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171)
at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:627)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:167)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:162)



求大神,谢谢!我该怎么改 我都加TimeStamp给ROWKEY了,还是不行,少量数据还能导进去,数据量大了根本进不去

惊扰5位了,抱歉

刚弄了50分,求救
...全文
1536 1 打赏 收藏 转发到动态 举报
写回复
用AI写文章
1 条回复
切换为时间正序
请发表友善的回复…
发表回复
qq_36483394 2017-02-20
  • 打赏
  • 举报
回复
楼主解决了么 也遇到了这个问题

932

社区成员

发帖
与我相关
我的任务
社区描述
云计算 云存储相关讨论
社区管理员
  • 云存储
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧