Pages

Sunday 29 January 2023

How to install Hadoop 3.3.0 on ubuntu using shell script

Installing Hadoop:

Installation of Hadoop can be done in 3 different modes:

  1. Stand alone mode - Single Node Cluster
  2. Pseudo distributed mode - Single Node Cluster
  3. Distributed mode - Multi Node Cluster

Local Mode or Stand alone Mode :

  • The default mode run by the Hadoop is the stand alone mode.
  • This mode is majorly used in case of debugging where HDFS will not be used.
  • In this mode both the input and the output can be used as local file system.
  • No Custom configuration required for mapred-site.xml, hdfs-site.xml and core-site.xml.
  • This mode is the fastest modes in Hadoop as the local file system is used for both the input and the output.

Pseudo-distributed Mode

  • The pseudo-distribute mode is also defined as a single-node cluster in which the NameNode and DataNode will exist in the same machine.
  • Configuration files such as mapred-site.xml, hdfs-site.xml and core-site.xml are required.
  • If all the Hadoop daemons will be executing on a single node then it is called pseudo-distributed mode. 
  • Both Master and Slave nodes are on the same machine.
  • A separate Java Virtual Machine (JVM) is created for all Hadoop components and these components communicate across network sockets. Due to this a fully functioning and optimized mini-cluster on a single host will produced effectively.

Fully-Distributed Mode

  • In this mode multiple nodes will executed so that it is called production mode of Hadoop. This is also called Multi-Node Cluster.
  • Data is distributed across several nodes on a Hadoop cluster.
  • In this mode Master and Slave services will be running on the separate nodes

Note: We need to run this shell script after once we access root access only 

How to install Hadoop 3.3.0 on ubuntu using shell script

#!/bin/bash
user()
 {
    uid=`id -u`
    if [ $uid -eq "0" ]; then
        echo " "
    else
        exit 
   fi
}
echo -n "Enter new group for hadoop user:"
read hdgroup
echo -n "Enter username for hadoop user:"
read hduser
echo "Adding user to group"
sudo addgroup $hdgroup
sudo adduser -ingroup $hdgroup $hduser
sleep 10
echo "$hdgroup is created and $hduser is assigned to the group"
updates_check() {
echo "checking for updates please wait..."
sudo apt-get update && apt-get upgrade -y && apt-get install ssh
}
java_version_check() {
echo "Checking for supported java version please wait..."
java_version=$(java -version 2>&1 > /dev/nul | grep version | awk '{print substr($3,4, length($3)-9);}'| tr -d ".")
if [ $java_version -eq "8" ];then
echo "system has 8 installed and supported for hadoop"
java_home=$(which java)
path=$(readlink -f $java_home | cut -c 1-33)
echo $path
else
java_installation_check
fi
}
java_installation_check() {
sudo apt-get remove java-common
echo "please wait.."
sudo apt install openjdk-8-jdk -y
java_version_check
}
ssh_keys_creation(){
sudo -u $hduser ssh-keygen -t rsa -P ""
sudo -u $hduser cat /home/$hduser/.ssh/id_rsa.pub >> /home/$hduser/.ssh/authorized_keys
sleep 10
echo "ssh Keys created"
}
hd_install() {
download() {
                wget http://archive.apache.org/dist/hadoop/core/hadoop-3.3.0/hadoop-3.3.0.tar.gz
                sleep 2
                tar xvfz hadoop-3.3.0.tar.gz
                sleep 2
                mv hadoop-3.3.0 /home/$hduser/hadoop
}
if [ -f "/home/$hduser/hadoop-3.3.0.tar.gz" ]; then
echo "Download Already exists... using the existing......."
sleep 2
tar xvfz hadoop-3.3.0.tar.gz
sleep 5
echo "Existing Dir deleted..."
rm -rf /home/$hduser/hadoop
sleep 5
mv hadoop-3.3.0 /home/$hduser/hadoop
else
download
fi 
chown -R $hduser:$hdgroup /home/$hduser/hadoop
#tmp folder for furthur process
sudo -u $hduser mkdir -p /home/$hduser/hadoop/app/hadoop/tmp
sudo -u $hduser chown -R $hduser:$hdgroup /home/$hduser/hadoop/app/hadoop/tmp
#namenode and datanode
sudo -u $hduser mkdir -p /home/$hduser/hadoop/hadoop_store/hdfs/namenode
sudo -u $hduser mkdir -p /home/$hduser/hadoop/hadoop_store/hdfs/datanode
#changing owner to name and data node
sudo -u $hduser chown -R $hduser:$hdgroup /home/$hduser/hadoop/hadoop_store
#permission to .bashrc,hadoop-env.sh,core-site.xml,mapred.site.xml,hdfs-site.xml,yarn-site.xml
sudo -u $hduser chmod o+w /home/$hduser/.bashrc
sudo -u $hduser chmod o+w /home/$hduser/hadoop/etc/hadoop/hadoop-env.sh
sudo -u $hduser chmod o+w /home/$hduser/hadoop/etc/hadoop/core-site.xml
sudo -u $hduser chmod o+w /home/$hduser/hadoop/etc/hadoop/mapred-site.xml
sudo -u $hduser chmod o+w /home/$hduser/hadoop/etc/hadoop/hdfs-site.xml
sudo -u $hduser chmod o+w /home/$hduser/hadoop/etc/hadoop/yarn-site.xml
 
echo "export JAVA_HOME=$path" >> /home/$hduser/hadoop/etc/hadoop/hadoop-env.sh

#bashrc
echo -e '\n\n #Hadoop Variable START \n export HADOOP_HOME=/home/'$hduser'/hadoop \n export HADOOP_INSTALL=$HADOOP_HOME \n export HADOOP_MAPRED_HOME=$HADOOP_HOME \n export HADOOP_COMMON_HOME=$HADOOP_HOME \n export HADOOP_HDFS_HOME=$HADOOP_HOME \n export YARN_HOME=$HADOOP_HOME \n export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native \n export PATH=$PATH:$HADOOP_HOME/sbin/:$HADOOP_HOME/bin \n export HADOOP_OPTS=-Djava.library.path=$HADOOP_HOME/lib/native \n #Hadoop Variable END\n\n' >> /home/$hduser/.bashrc
source /home/$hduser/.bashrc
#core-site.xml
sudo sed -i '/<configuration>/a <property>\n\t\t<name>hadoop.tmp.dir</name>\n\t\t<value>/home/'$hduser'/hadoop/app/hadoop/tmp</value>\n</property>\n<property>\n\t\t<name>fs.default.name</name>\n\t\t<value>hdfs://localhost:9000</value>\n</property>' /home/$hduser/hadoop/etc/hadoop/core-site.xml
#mapred-site.xml
sudo sed -i '/<configuration>/a <property>\n\t\t <name>mapreduce.framework.name</name>\n\t\t <value>yarn</value>\n</property>' /home/$hduser/hadoop/etc/hadoop/mapred-site.xml
#hdfs-site.xml
sudo sed -i '/<configuration>/a <property>\n\t\t<name>dfs.data.dir</name>\n\t\t<value>/home/'$hduser'/hadoop/dfsdata/namenode</value>\n</property>\n<property>\n\t\t<name>dfs.data.dir</name>\n\t\t<value>/home/'$hduser'/hadoop/dfsdata/datanode</value>\n</property>\n\t\t<property><name>dfs.replication</name>\n\t\t<value>1</value>\n</property>' /home/$hduser/hadoop/etc/hadoop/hdfs-site.xml
#yarn-site.xml
sudo sed -i '/<configuration>/a <property>\n\t\t<name>yarn.nodemanager.aux-services</name>\n\t\t<value>mapreduce_shuffle</value>\n</property>\n<property>\n\t\t<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>\n\t\t<value>org.apache.hadoop.mapred.ShuffleHandler</value>\n</property>\n\t\t<property><name>yarn.resourcemanager.hostname</name>\n\t\t<value>127.0.0.1</value>\n</property>\n\t\t<property>\n\t\t<name>yarn.acl.enable</name>\n\t\t<value>0</value>\n</property>\n\t\t<property>\n\t\t<name>yarn.nodemanager.env-whitelist</name>\n\t\t<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PERPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>\n</property>' /home/$hduser/hadoop/etc/hadoop/yarn-site.xml
#revoking permissions
sudo -u $hduser chmod o-w /home/$hduser/hadoop/etc/hadoop/hadoop-env.sh
sudo -u $hduser chmod o-w /home/$hduser/hadoop/etc/hadoop/core-site.xml
sudo -u $hduser chmod o-w /home/$hduser/hadoop/etc/hadoop/mapred-site.xml
sudo -u $hduser chmod o-w /home/$hduser/hadoop/etc/hadoop/hdfs-site.xml
sudo -u $hduser chmod o-w /home/$hduser/hadoop/etc/hadoop/yarn-site.xml
#hadoop dir
sudo ls /home/$hduser/hadoop
#ssh
sudo -u hduser ssh localhost
}
user
updates_check
java_version_check
ssh_keys_creation
hd_install


After you installed the hadoop using above shell script now identity the path hadoop/bin and then type the following command 

hadoop namenode -format

After formatting the namenode ,now identity the path hadoop/sbin and then type the following command 

 ./start-all.sh

or

./start-dfs.sh
./start-yarn.sh

After starting all the 5 daemons on the said path hadoop/sbin now type the following command 

jps (Java Virtual Machine Process Status)





The pseudo-distribute mode is also defined as a single-node cluster in which the NameNode and DataNode will exist in the same machine.

No comments:

Post a Comment

Friends-of-friends-Map Reduce program

Program to illustrate FOF Map Reduce: import java.io.IOException; import java.util.*; import org.apache.hadoop.conf.Configuration; import or...