MovieRatingDriver

The configuration is set in the  Driver class and the total number of reducers is set to 1 because if we have multiple reducers, it will result in multiple top 20 movies and the final result may not meet the expectation, for example:


import org.apache.Hadoop.conf.Configuration;
import org.apache.Hadoop.conf.Configured;
import org.apache.Hadoop.fs.Path;
import org.apache.Hadoop.io.Text;
import org.apache.Hadoop.mapreduce.Job;
import org.apache.Hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.Hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.Hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.Hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.Hadoop.util.Tool;
import org.apache.Hadoop.util.ToolRunner;

public class MovieRatingDriver extends Configured implements Tool {


public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), (Tool) new MovieRatingDriver(), args);
System.exit(res);
}

public int run(String[] args) throws Exception {

Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "TopMoviwByRating");
job.setNumReduceTasks(1);

job.setJarByClass(MovieRatingDriver.class);


if (args.length < 2) {
System.out.println("Jar requires 2 paramaters : \""
+ job.getJar()
+ " input_path output_path");
return 1;
}

job.setMapperClass(MovieRatingMapper.class);

job.setReducerClass(MovieRatingReducer.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(TextInputFormat.class);

job.setOutputFormatClass(TextOutputFormat.class);

Path filePath = new Path(args[0]);
FileInputFormat.setInputPaths(job, filePath);

Path outputPath = new Path(args[1]);
FileOutputFormat.setOutputPath(job, outputPath);

job.waitForCompletion(true);
return 0;
}
}

We have not used the combiner as we are only flushing out 20 records from the mapper at the end and therefore there is no need for the combiner here. Now let's look into how we can optimize MapReduce applications.