Program on MapReduce Weather Dataset :
Step 1: create a folder ncdc and give the permission to access any where from root
chmod -R 777 ncdc
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MaxtempDriver
step 2:Collect the weather datasets using NCDC domain and name this file as tempinput.txt and then place it in ncdc folder
0067011990999991950051507004+68750+023550FM-12+038299999V0203301N00671220001CN9999999N9+00001+99999999999
0043011990999991950051512004+68750+023550FM-12+038299999V0203201N00671220001CN9999999N9+00221+99999999999
0043011990999991950051518004+68750+023550FM-12+038299999V0203201N00261220001CN9999999N9-00111+99999999999
0043012650999991949032412004+62300+010750FM-12+048599999V0202701N00461220001CN0500001N9+01111+99999999999
0043012650999991949032418004+62300+010750FM-12+048599999V0202701N00461220001CN0500001N9+00781+99999999999
Step 3: create 3 files namely
- MaxtempDriver.java
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MaxtempDriver
{
public static void main(String[] args) throws Exception
public static void main(String[] args) throws Exception
{
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "MaxtempDriver");
job.setJarByClass(MaxtempDriver.class);
// TODO: specify a mapper
job.setMapperClass(MaxtempMapper.class);
// TODO: specify a reducer
job.setReducerClass(MaxtempReducer.class);
// TODO: specify output types
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// TODO: specify input and output DIRECTORIES (not files)
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
if (!job.waitForCompletion(true))
return;
}
}
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "MaxtempDriver");
job.setJarByClass(MaxtempDriver.class);
// TODO: specify a mapper
job.setMapperClass(MaxtempMapper.class);
// TODO: specify a reducer
job.setReducerClass(MaxtempReducer.class);
// TODO: specify output types
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// TODO: specify input and output DIRECTORIES (not files)
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
if (!job.waitForCompletion(true))
return;
}
}
2.MaxtempMapper.java
//package maxtemp;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Mapper;
public class MaxtempMapper
extends Mapper<LongWritable, Text, Text, IntWritable >
{
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException
{
String line=value.toString();
String year=line.substring(15,19);
int airtemp;
if(line.charAt(87)== '+')
{
airtemp=Integer.parseInt(line.substring(88,92));
}
else
airtemp=Integer.parseInt(line.substring(87,92));
String q=line.substring(92,93);
if(airtemp!=9999&&q.matches("[01459]"))
{
context.write(new Text(year),new IntWritable(airtemp));
}
}
}
String line=value.toString();
String year=line.substring(15,19);
int airtemp;
if(line.charAt(87)== '+')
{
airtemp=Integer.parseInt(line.substring(88,92));
}
else
airtemp=Integer.parseInt(line.substring(87,92));
String q=line.substring(92,93);
if(airtemp!=9999&&q.matches("[01459]"))
{
context.write(new Text(year),new IntWritable(airtemp));
}
}
}
3.MaxtempReducer.java
//package maxtemp;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Reducer;
public class MaxtempReducer
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Reducer;
public class MaxtempReducer
extends Reducer<Text, IntWritable, Text, IntWritable>
{
public void reduce(Text key, Iterable<IntWritable> values, Context context)throws IOException, InterruptedException
{
int maxvalue=Integer.MIN_VALUE;
for (IntWritable value : values)
int maxvalue=Integer.MIN_VALUE;
for (IntWritable value : values)
{
maxvalue=Math.max(maxvalue, value.get());
}
context.write(key, new IntWritable(maxvalue));
}
}
maxvalue=Math.max(maxvalue, value.get());
}
context.write(key, new IntWritable(maxvalue));
}
}
Step 4: generate class files for each java file using the following commands
hduser@ubuntu:~/ncdc$ export CLASSPATH=`hadoop classpath`
hduser@ubuntu:~/ncdc$ echo $CLASSPATH
hduser@ubuntu:~/ncdc$ echo $CLASSPATH
hduser@ubuntu:~/ncdc$ javac -d . MaxtempMapper.java MaxtempReducer.java MaxtempDriver.java
Step 5:Create a jar by using the following command
hduser@ubuntu:~/ncdc$ jar -cvf max.jar -C /home/hduser/ncdc .
Step 6:create a folder rkmaxtemp and then copy the whether dataset file tempinput.txt under DFS using the following commands hduser@ubuntu:~/ncdc$ hadoop fs -mkdir /rkmaxtemp
hduser@ubuntu:~/ncdc$ hadoop fs -put tempinput.txt /rkmaxtemp
hduser@ubuntu:~/ncdc$ hadoop fs -lsr /rkmaxtemp
hduser@ubuntu:~/ncdc$ hadoop fs -put tempinput.txt /rkmaxtemp
hduser@ubuntu:~/ncdc$ hadoop fs -lsr /rkmaxtemp
hduser@ubuntu:~/ncdc$ hadoop fs -cat /rkmaxtemp/tempinput.txt
Step 7:Now run the command hadoop jar to MaxtempDriver file as shown
hduser@ubuntu:~/ncdc$ hadoop jar max.jar MaxtempDriver /rkmaxtemp/tempinput.txt /rkmaxtemp/out
Step 8:Now we can check the maximum temperature of the given dataset in a folder rkmaxtemp/out under DFS
hduser@ubuntu:~/ncdc$ hadoop fs -cat /rkmaxtemp/out/part-r-00000
No comments:
Post a Comment