您好,登錄后才能下訂單哦!
本篇文章給大家分享的是有關mr如何使用hcatalog讀寫hive表,小編覺得挺實用的,因此分享給大家學習,希望大家閱讀完這篇文章后可以有所收獲,話不多說,跟著小編一起來看看吧。
public class onTimeMapper extends Mapper {
@Override
protected void map(WritableComparable key, HCatRecord value,
org.apache.hadoop.mapreduce.Mapper.Context context)
throws IOException, InterruptedException {
// Get table schema
HCatSchema schema = HCatBaseInputFormat.getTableSchema(context);
Integer year = new Integer(value.getString("year", schema));
Integer month = new Integer(value.getString("month", schema));
Integer DayofMonth = value.getInteger("dayofmonth", schema);
context.write(new IntPair(year, month), new IntWritable(DayofMonth));
}
}
?
public class onTimeReducer extends Reducer {public void reduce (IntPair key, Iterable value, Context context) throws IOException, InterruptedException{ int count = 0; // records counter for particular year-month for (IntWritable s:value) { count++; } // define output record schema List columns = new ArrayList(3); columns.add(new HCatFieldSchema("year", HCatFieldSchema.Type.INT, "")); columns.add(new HCatFieldSchema("month", HCatFieldSchema.Type.INT, "")); columns.add(new HCatFieldSchema("flightCount", HCatFieldSchema.Type.INT,"")); HCatSchema schema = new HCatSchema(columns); HCatRecord record = new DefaultHCatRecord(3); record.setInteger("year", schema, key.getFirstInt()); record.set("month", schema, key.getSecondInt()); record.set("flightCount", schema, count); context.write(null, record);}}
public class onTimeDriver extends Configured implements Tool{
private static final Log log = LogFactory.getLog( onTimeDriver.class );
public int run( String[] args ) throws Exception{
Configuration conf = new Configuration();
Job job = new Job(conf, "OnTimeCount");
job.setJarByClass(onTimeDriver.class);
job.setMapperClass(onTimeMapper.class);
job.setReducerClass(onTimeReducer.class);
HCatInputFormat.setInput(job, "airline", "ontimeperf");
job.setInputFormatClass(HCatInputFormat.class);
job.setMapOutputKeyClass(IntPair.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DefaultHCatRecord.class);
job.setOutputFormatClass(HCatOutputFormat.class);
HCatOutputFormat.setOutput(job, OutputJobInfo.create("airline", "flight_count", null));
HCatSchema s = HCatOutputFormat.getTableSchema(job);
HCatOutputFormat.setSchema(job, s);
return (job.waitForCompletion(true)? 0:1);
}
public static void main(String[] args) throws Exception{
int exitCode = ToolRunner.run(new onTimeDriver(), args);
System.exit(exitCode);
}
}
?
create table airline.flight_count(Year INT ,Month INT ,flightCount INT)ROW FORMAT DELIMITED FIELDS TERMINATED BY ','STORED AS TEXTFILE;
以上就是mr如何使用hcatalog讀寫hive表,小編相信有部分知識點可能是我們日常工作會見到或用到的。希望你能通過這篇文章學到更多知識。更多詳情敬請關注億速云行業資訊頻道。
免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。