package edu.calstatela.hipic.hadoop.util; /** * This is a Hadoop example file updated based on Tom White's MaxTemperatureWithCombiner * Input Folder/File: tempIn * Output File: output5 * Copyright: Jongwook Woo (jwoo5@calstatela.edu) */ // cc MaxTemperatureWithCombiner Application to find the maximum temperature, using a combiner function for efficiency import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.mapred.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; // vv MaxTemperatureWithCombiner public class MyMaxTemperatureWithCombiner extends Configured implements Tool{ /** * Set arguments with no of map/reduce nodes and input/output files * @author Jongwook Woo (jwoo5@calstatela.edu) * @param args * @throws Exception */ public static void main(String[] args) throws Exception { args = new String[6]; args[0] = "-m"; //no of map args[1] = "10"; args[2] = "-r"; //no of reduce args[3] = "10"; args[4] = "tempIn"; //input folder args[5] = "tempOut"; //output folder int res = ToolRunner.run(new Configuration(), new MyMaxTemperatureWithCombiner(), args); //args); System.exit(res); } /** * @author Jongwook Woo (jwoo5@calstatela.edu) * @param args * @return * @throws Exception */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), MyMaxTemperatureWithCombiner.class); conf.setJobName("MyMaxTemperatureWithCombiner"); conf.setMapperClass(MaxTemperatureMapper.class); /*[*/conf.setCombinerClass(MaxTemperatureReducer.class)/*]*/; conf.setReducerClass(MaxTemperatureReducer.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); List other_args = new ArrayList(); for(int i=0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i-1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } //conf.setInputPath(new Path(other_args.get(0))); //conf.setOutputPath(new Path(other_args.get(1))); // Updated input/output conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(other_args.get(0))); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; } static int printUsage() { System.out.println("multifetch [-m nmaps] [-r nreduces] "); ToolRunner.printGenericCommandUsage(System.out); return -1; } }