博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
mapreduce多文件输出的两方法
阅读量:7227 次
发布时间:2019-06-29

本文共 4435 字,大约阅读时间需要 14 分钟。

mapreduce多文件输出的两方法
 
package duogemap;
 
import java.io.IOException;
 
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.mapred.lib.MultipleOutputs;
import org.apache.hadoop.util.GenericOptionsParser;
 
public class OldMulOutput {
 
public static class MapClass extends MapReduceBase implements Mapper<LongWritable, Text, NullWritable, Text>{
private MultipleOutputs mos;
private OutputCollector<NullWritable, Text> collector;
 
 
public void Configured(JobConf conf){
mos=new MultipleOutputs(conf);
}
 
public void map(LongWritable key, Text value, OutputCollector<NullWritable, Text> output,Reporter reporter)
throws IOException{
String[] arr=value.toString().split(",", -1);
String chrono=arr[1]+","+arr[2];
String geo=arr[4]+","+arr[5];
collector=mos.getCollector("chrono", reporter);
collector.collect(NullWritable.get(),new Text(chrono));
collector=mos.getCollector("geo", reporter);
collector.collect(NullWritable.get(),new Text(geo));
}
 
public void close() throws IOException{
mos.close();
}
 
 
public static void main(String[] args) throws IOException {
Configuration conf=new Configuration();
String[] remainingArgs=new GenericOptionsParser(conf, args).getRemainingArgs();
 
if (remainingArgs.length !=2) {
System.err.println("Error!");
System.exit(1);
}
 
JobConf job=new JobConf(conf,OldMulOutput.class);
Path in=new Path(remainingArgs[0]);
Path out=new Path(remainingArgs[1]);
FileInputFormat.setInputPaths(job, in);
FileOutputFormat.setOutputPath(job, out);
 
job.setJobName("Multifile");
job.setMapperClass(MapClass.class);
job.setInputFormat(TextInputFormat.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
 
job.setNumReduceTasks(0);
MultipleOutputs.addNamedOutput(job, "chrono", TextOutputFormat.class, NullWritable.class, Text.class);
MultipleOutputs.addNamedOutput(job, "geo", TextOutputFormat.class, NullWritable.class, Text.class);
JobClient.runJob(job);
}
 
}
}
 
 
 
package duogemap;
 
import java.io.IOException;
 
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.util.GenericOptionsParser;
 
import duogemap.OldMulOutput.MapClass;
 
public class MulOutput {
 
public static class MapClass extends Mapper<LongWritable, Text, NullWritable, Text>{
 
private MultipleOutputs mos;
 
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
super.setup(context);
mos=new MultipleOutputs(context);
}
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
mos.write(NullWritable.get(),value,generateFileName(value));
}
private String generateFileName(Text value) {
// TODO Auto-generated method stub
String[] split=value.toString().split(",", -1);
String country=split[4].substring(1, 3);
 
return country+"/";
}
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
super.cleanup(context);
mos.close();
}
 
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf=new Configuration();
Job job=Job.getInstance(conf, "Muloutput");
String[] remainingArgs=new GenericOptionsParser(conf, args).getRemainingArgs();
 
if (remainingArgs.length !=2) {
System.err.println("Error!");
System.exit(1);
}
 
Path in=new Path(remainingArgs[0]);
Path out=new Path(remainingArgs[1]);
FileInputFormat.setInputPaths(job, in);
FileOutputFormat.setOutputPath(job, out);
 
job.setMapperClass(MapClass.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
 
job.setNumReduceTasks(0);
System.exit(job.waitForCompletion(true)?0:1);
}
}
}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

转载于:https://www.cnblogs.com/liquan-anran/p/6253087.html

你可能感兴趣的文章
精通Oracle 10g Pl/SQL编程
查看>>
邱怡轩:R中大规模矩阵的SVD与矩阵补全
查看>>
C++ Primer习题集(第5版)
查看>>
centos7 mysql 5.7 yum安装
查看>>
JSOUP简单应用
查看>>
Mysql,SqlServer,Oracle主键自动增长的设置
查看>>
开源 java CMS - FreeCMS2.3会员登录
查看>>
malloc(0)的返回值
查看>>
析构方法、克隆对象
查看>>
Python字符编码详解
查看>>
Android开发 Firebase动态链接打开APP
查看>>
基于 HTML5 Canvas 的 3D 模型贴图问题
查看>>
让技术不要成为“背锅侠”!
查看>>
dubbo源码分析系列——dubbo的SPI机制源码分析
查看>>
表格单元格td设置宽度无效的解决办法
查看>>
防止视频资源被下载
查看>>
都是并发惹的祸
查看>>
eclipse实现JavaWeb项目 增量打包
查看>>
面试题系列一之 程序生命周期
查看>>
设计模式——观察者模式:气象监测应用
查看>>