KeyValueTextInputFormat
KeyValueTextInputFormat
Zero零_度 发表于3年前
KeyValueTextInputFormat
  • 发表于 3年前
  • 阅读 47
  • 收藏 0
  • 点赞 0
  • 评论 0

腾讯云 技术升级10大核心产品年终让利>>>   

package com.test;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueLineRecordReader;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * hello jim
 * hello tim
 *
 * 最后输出
 * hello 1
 * jim 1
 * hello 1
 * tim 1
 */
public class WordCountKeyValue extends Configured implements Tool {
 
 public static class Map extends Mapper<Text, Text, Text, IntWritable> {
  /**
   * key hello
   * value jim
   */
  public void map(Text key, Text value, Context context) throws IOException, InterruptedException {
   context.write(key, new IntWritable(1));
   context.write(value, new IntWritable(1));
  }
 }
 
 public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
  Configuration conf = this.getConf();
  //指定KeyValueTextInputFormat分割符,默认分割符是\t
  //conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", "\t");
  conf.set(KeyValueLineRecordReader.KEY_VALUE_SEPERATOR, "\t");

  
  Job job = new Job(conf);
  job.setJobName(WordCountKeyValue.class.getSimpleName());
  job.setJarByClass(WordCountKeyValue.class);
  
  FileInputFormat.addInputPath(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  
  job.setNumReduceTasks(0);
  job.setMapperClass(Map.class);
  
  job.setInputFormatClass(KeyValueTextInputFormat.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(IntWritable.class);
  job.waitForCompletion(true);
  return job.isSuccessful()?0:1;
 }
 
 public static void main(String[] args) throws Exception {
  int exit = ToolRunner.run(new WordCount(), args);
  System.exit(exit);
 }
 
}

共有 人打赏支持
粉丝 65
博文 726
码字总数 241540
×
Zero零_度
如果觉得我的文章对您有用,请随意打赏。您的支持将鼓励我继续创作!
* 金额(元)
¥1 ¥5 ¥10 ¥20 其他金额
打赏人
留言
* 支付类型
微信扫码支付
打赏金额:
已支付成功
打赏金额: