diff --git a/spot-ingest/README.md b/spot-ingest/README.md index acfb3828..9a489c5f 100644 --- a/spot-ingest/README.md +++ b/spot-ingest/README.md @@ -18,8 +18,13 @@ Ingest data is captured or transferred into the Hadoop cluster, where they are t * Ingest user with sudo privileges (i.e. spot). This user will execute all the processes in the Ingest Framework also this user needs to have access to hdfs solution path (i.e. /user/spot/). ### Install -1. Install Python dependencies `pip install -r requirements.txt` +run `sudo ./install.sh` + +* If your environment requires proxies we recommend using `sudo -E ./install.sh` +* dependencies installed in /opt/spot/bin/ +* Installs [tshark](https://www.wireshark.org/docs/man-pages/tshark.html), [spot-nfdump](https://github.com/Open-Network-Insight/spot-nfdump), [Python PIP](https://pip.pypa.io/en/stable/) + ### Configure Kafka **Adding Kafka Service:** diff --git a/spot-ingest/install.sh b/spot-ingest/install.sh new file mode 100755 index 00000000..96c01d29 --- /dev/null +++ b/spot-ingest/install.sh @@ -0,0 +1,163 @@ +#!/bin/bash + +nfdump_vers=1.1 +wshark_vers=2.2.3 +local_path=`pwd` +source_path=/tmp/ingest_src +install_path=/opt/spot/ +dependencies=(tar wget screen python make gcc m4 automake autoconf flex byacc) +missing_dep=() +host_os="" +wget_cmd="wget -nc" +untar_cmd="tar -xvf" +mk_opt="-j `nproc`" + +# functions +log_cmd () { + printf "\n****SPOT.INGEST.install.sh****\n" + date +"%y-%m-%d %H:%M:%S" + printf "$1\n\n" +} + +check_os () { + # detect distribution + # to add other distributions simply create a test case with installation commands + if [ -f /etc/redhat-release ]; then + install_cmd="yum -y install" + log_cmd "installation command: $install_cmd" + host_os="rhel" + elif [ -f /etc/debian_version ]; then + install_cmd="apt-get install -yq" + log_cmd "installation command: $install_cmd" + host_os="debian" + apt-get update + fi +} + +cleanup () { + log_cmd "executing cleanup" + rm -rf ${source_path} +} + +check_root () { + # checking for root as many of these functions interact with system owned directories + if [[ "$EUID" -ne 0 ]]; then + log_cmd "Non root user detected, Please run as root or with sudo" + exit 1 + fi +} + +check_bin () { + # check_bin can be used to verify if a certain binary is already installed + for item in "$@"; do + if type ${item} >/dev/null 2>&1; then + log_cmd "${item} found" + else + missing_dep+=(${item}) + fi + done + } + +install_pkg () { + # if no parameters this will simply install any $missing_deps + # if any parameters provided they will be added to $missing_dep + if [[ "$@" ]]; then + for item in "$@"; do + missing_dep+=(${item}) + done + fi + + if [[ "${missing_dep[@]}" ]]; then + log_cmd "installing ${missing_dep[@]}" + ${install_cmd} ${missing_dep[@]} + unset missing_dep[*] + fi +} + +check_tshark () { + # check dependencies only, installs in custom location + log_cmd "installing dependencies for tshark installation" + if [ "${host_os}" == "debian" ]; then + check_bin make bzip2 pkg-config libsmi flex bison byacc + install_pkg libpcap-dev heimdal-dev libc-ares-dev + elif [ "${host_os}" == "rhel" ]; then + check_bin make bzip2 gcc bison + install_pkg glib2-devel flex-devel libsmi-devel libpcap-devel + fi +} + +install_tshark () { + if type tshark >/dev/null 2>&1; then + log_cmd "tshark found" + else + log_cmd "tshark missing" + check_tshark + ${wget_cmd} https://1.na.dl.wireshark.org/src/wireshark-${wshark_vers}.tar.bz2 -P ${source_path}/ + ${untar_cmd} ${source_path}/wireshark-${wshark_vers}.tar.bz2 -C ${source_path}/ + cd ${source_path}/wireshark-${wshark_vers} + log_cmd "compiling tshark" + ./configure --prefix=${install_path} --enable-wireshark=no + make ${mk_opt} + make install + cd .. + fi + log_cmd "tshark build complete" + tshark -v +} + +install_nfdump () { + if type nfdump >/dev/null 2>&1; then + log_cmd "nfdump found" + else + log_cmd "installing spot-nfdump" + ${wget_cmd} https://github.com/Open-Network-Insight/spot-nfdump/archive/${nfdump_vers}.tar.gz -P ${source_path}/ + ${untar_cmd} ${source_path}/${nfdump_vers}.tar.gz -C ${source_path}/ + cd ${source_path}/spot-nfdump-*/ + source ./install_nfdump.sh ${install_path} + cd ${local_path} + fi +} + +install_pip () { + if type pip >/dev/null 2>&1; then\ + log_cmd "pip found" + else + log_cmd "missing pip" + ${wget_cmd} https://bootstrap.pypa.io/get-pip.py -P ${source_path}/ + python ${source_path}/get-pip.py + log_cmd "pip installed" + fi +} + +# end functions + +check_root +check_os + +if [ ! -d ${source_path} ]; then + mkdir ${source_path} +fi + +if [ ! -d ${install_path} ]; then + log_cmd "${install_path} not created, Please run spot-setup/install.sh first" + exit 1 +fi + +# check basic dependencies +check_bin ${dependencies[@]} +install_pkg + +# install dissectors +install_tshark +install_nfdump + +# python dependencies +install_pip + +if [ -f ${local_path}/requirements.txt ]; then + pip install -r requirements.txt +fi + +log_cmd "spot-ingest dependencies installed" + +cleanup diff --git a/spot-ml/README.md b/spot-ml/README.md index 3200c064..e2af8e02 100644 --- a/spot-ml/README.md +++ b/spot-ml/README.md @@ -11,6 +11,15 @@ netflow and DNS records, and spot-ml will try to load data to the operational an The remaining instructions in this README file treat spot-ml in a stand-alone fashion that might be helpful for customizing and troubleshooting the component. +### Install + +run `sudo ./install.sh` + +* If your environment requires proxies we recommend using `sudo -E ./install.sh` +* Installs [SBT](http://www.scala-sbt.org) +* Compiles the spot-ml jar and copies to /opt/spot/jar/ +* Copies ml_ops.sh to /opt/spot/bin/ + ## Prepare data for input Load data for consumption by spot-ml by running [spot-ingest]. diff --git a/spot-ml/install.sh b/spot-ml/install.sh new file mode 100755 index 00000000..77fbd32e --- /dev/null +++ b/spot-ml/install.sh @@ -0,0 +1,112 @@ +#!/bin/bash + +local_path=`pwd` +install_path=/opt/spot +dependencies=(curl) +missing_dep=() +wget_cmd="wget -nc" +host_os="" + + +# functions + +log_cmd () { + + printf "\n****SPOT.ML.install.sh****\n" + date +"%y-%m-%d %H:%M:%S" + printf "$1\n\n" +} + +check_os () { + # detect distribution + # to add other distributions simply create a test case with installation commands + if [ -f /etc/redhat-release ]; then + install_cmd="yum -y install" + log_cmd "installation command: $install_cmd" + host_os="rhel" + elif [ -f /etc/debian_version ]; then + install_cmd="apt-get install -yq" + log_cmd "installation command: $install_cmd" + host_os="debian" + apt-get update + fi +} + +check_root () { + # checking for root as many of these functions interact with system owned directories + if [[ "$EUID" -ne 0 ]]; then + log_cmd "Non root user detected, Please run as root or with sudo" + exit 1 + fi +} + +check_bin () { + # check_bin can be used to verify if a certain binary is already installed + + for item in "$@"; do + if type ${item} >/dev/null 2>&1; then + log_cmd "${item} found" + else + missing_dep+=(${item}) + fi + done +} + +install_pkg () { + # if no parameters this will simply install any $missing_deps + # if any parameters provided they will be added to $missing_dep + + if [[ "$@" ]]; then + for item in "$@"; do + missing_dep+=(${item}) + done + fi + + if [[ "${missing_dep[@]}" ]]; then + log_cmd "installing ${missing_dep[@]}" + ${install_cmd} ${missing_dep[@]} + unset missing_dep[*] + fi +} + +sbt_install () { + if type sbt >/dev/null 2>&1; then + log_cmd "sbt found" + else + log_cmd "installing sbt for ${host_os}" + if [[ ${host_os} == 'debian' ]]; then + echo "deb https://dl.bintray.com/sbt/debian /" | tee -a /etc/apt/sources.list.d/sbt.list + apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 2EE0EA64E40A89B84B2DF73499E82A75642AC823 + apt-get update + apt-get install sbt + elif [[ ${host_os} == 'rhel' ]]; then + curl https://bintray.com/sbt/rpm/rpm | tee /etc/yum.repos.d/bintray-sbt-rpm.repo + yum -y install sbt + fi + fi +} + + +# end functions + +check_os +check_root + +# check basic dependencies +check_bin ${dependencies[@]} +install_pkg + +sbt_install + +# build +log_cmd 'assembling spot-ml jar' +sbt assembly + +log_cmd "spot-ml dependencies installed" + +# post build +log_cmd "copying generated files to /opt/spot/" +cp ./target/scala-2.10/spot-ml-assembly-*.jar ${install_path}/jar/ +cp ./ml_ops.sh ${install_path}/bin + +log_cmd "spot-ml dependencies installed" diff --git a/spot-ml/ml_ops.sh b/spot-ml/ml_ops.sh index 7bf98d75..84d7be62 100755 --- a/spot-ml/ml_ops.sh +++ b/spot-ml/ml_ops.sh @@ -94,7 +94,7 @@ time spark-submit --class "org.apache.spot.SuspiciousConnects" \ --conf spark.kryoserializer.buffer.max=512m \ --conf spark.yarn.am.waitTime=100s \ --conf spark.yarn.am.memoryOverhead=${SPK_DRIVER_MEM_OVERHEAD} \ - --conf spark.yarn.executor.memoryOverhead=${SPK_EXEC_MEM_OVERHEAD} target/scala-2.10/spot-ml-assembly-1.1.jar \ + --conf spark.yarn.executor.memoryOverhead=${SPK_EXEC_MEM_OVERHEAD} /opt/spot/jar/spot-ml-assembly-1.1.jar \ --analysis ${DSOURCE} \ --input ${RAWDATA_PATH} \ --dupfactor ${DUPFACTOR} \ diff --git a/spot-oa/README.md b/spot-oa/README.md index ee9b85ba..8273311f 100644 --- a/spot-oa/README.md +++ b/spot-oa/README.md @@ -10,13 +10,16 @@ Some of the technologies used are: - [Bootstrap](http://getbootstrap.com/) - [ReactJS](https://facebook.github.io/react/) -** For more specific requirements, please refer to each specific pipeline readme file before running OA.* +** For more specific requirements, please refer to each specific pipeline readme file before running OA. ** ---------- ## **Installation** -1. Install python dependencies `pip install -r requirements.txt` -2. Install UI requirements and build UI following the steps from [here](ui/INSTALL.md) +run `sudo ./install.sh` + +* If your environment requires proxies we recommend using `sudo -E ./install.sh` +* Installs [NPM](https://docs.npmjs.com/cli/install), [Python PIP](https://pip.pypa.io/en/stable/) + ## **Folder Structure** diff --git a/spot-oa/install.sh b/spot-oa/install.sh new file mode 100755 index 00000000..d9ae2ba3 --- /dev/null +++ b/spot-oa/install.sh @@ -0,0 +1,131 @@ +#!/bin/bash + +local_path=`pwd` +source_path=/tmp/oa_src +install_path=/opt/spot/ +dependencies=(curl wget screen python) +missing_dep=() +wget_cmd="wget -nc" +host_os="" + +# functions + +log_cmd () { + + printf "\n****SPOT.OA.install.sh****\n" + date +"%y-%m-%d %H:%M:%S" + printf "$1\n\n" +} + +check_os () { + # detect distribution + # to add other distributions simply create a test case with installation commands + if [ -f /etc/redhat-release ]; then + install_cmd="yum -y install" + log_cmd "installation command: $install_cmd" + host_os="rhel" + elif [ -f /etc/debian_version ]; then + install_cmd="apt-get install -yq" + log_cmd "installation command: $install_cmd" + host_os="debian" + apt-get update + fi +} + +check_root () { + # checking for root as many of these functions interact with system owned directories + if [[ "$EUID" -ne 0 ]]; then + log_cmd "Non root user detected, Please run as root or with sudo" + exit 1 + fi +} + +cleanup () { + log_cmd "executing cleanup" + rm -rf ${source_path} +} + +check_bin () { + # check_bin can be used to verify if a certain binary is already installed + + for item in "$@"; do + if type ${item} >/dev/null 2>&1; then + log_cmd "${item} found" + else + missing_dep+=(${item}) + fi + done +} + +install_pkg () { + # if no parameters this will simply install any $missing_deps + # if any parameters provided they will be added to $missing_dep + + if [[ "$@" ]]; then + for item in "$@"; do + missing_dep+=(${item}) + done + fi + + if [[ "${missing_dep[@]}" ]]; then + log_cmd "installing ${missing_dep[@]}" + ${install_cmd} ${missing_dep[@]} + unset missing_dep[*] + fi +} + +install_pip () { + if type pip >/dev/null 2>&1; then\ + log_cmd "pip found" + else + log_cmd "missing pip" + ${wget_cmd} https://bootstrap.pypa.io/get-pip.py -P ${source_path}/ + python ${source_path}/get-pip.py + log_cmd "pip installed" + fi +} + +install_npm () { + + log_cmd 'installing NodeJS 7 for ${host_os}' + if [[ ${host_os} == 'debian' ]]; then + curl -sL https://deb.nodesource.com/setup_7.x | bash - + apt-get install -y nodejs + + elif [[ ${host_os} == 'rhel' ]]; then + curl -sL https://rpm.nodesource.com/setup_7.x | bash - + yum install -y nodejs + fi +} +# end functions + +check_os +check_root + +if [ ! -d ${source_path} ]; then + mkdir ${source_path} +fi + +if [ ! -d ${install_path} ]; then + log_cmd "${install_path} not created, Please run spot-setup/local_setup.sh first" + exit 1 +fi + +# check basic dependencies +check_bin ${dependencies[@]} +install_pkg + +install_pip + +if [ -f ${local_path}/requirements.txt ]; then + pip install -r requirements.txt +fi + +install_npm + +# build ui +cd ui +npm install + +log_cmd "spot-oa dependencies installed" +cleanup diff --git a/spot-setup/README.md b/spot-setup/README.md index 1ac02f24..8b5130d6 100644 --- a/spot-setup/README.md +++ b/spot-setup/README.md @@ -20,6 +20,10 @@ To collaborate and run spot-setup, it is required the following prerequisites: The main script in the repository is **hdfs_setup.sh** which is responsible of loading environment variables, creating folders in Hadoop for the different use cases (flow, DNS or Proxy), create the Hive database, and finally execute hive query scripts that creates Hive tables needed to access netflow, dns and proxy data. +**install.sh** will add some default directories such as /opt/spot/ for installing known dependencies, when running `install.sh` you should do so with root access or via `sudo` +The first parameter will be the default user who owns /opt/spot. +Example: `sudo install.sh` + ## Environment Variables **spot.conf** is the file storing the variables needed during the installation process including node assignment, User interface, Machine Learning and Ingest gateway nodes. @@ -28,6 +32,20 @@ This file also contains sources desired to be installed as part of Apache Spot, To read more about these variables, please review the [wiki] (https://github.com/Open-Network-Insight/open-network-insight/wiki/Edit%20Solution%20Configuration). +## Install + +1. run `sudo ./install.sh` +2. run `hdfs_setup.sh` + +** install.sh ** + +* Creates /opt/spot/ to store common spot applications +* Creates /etc/profile.d/spot.sh for spot specific environment variables + +** hdfs_setup.sh ** + +* Depending on your Apache Hadoop environment you may need to use sudo -u i.e. `sudo -u hdfs ./hdfs_setup.sh` + ## Database Query Scripts spot-setup contains a script per use case, as of today, there is a table creation script for each DNS, flow and Proxy data. diff --git a/spot-setup/install.sh b/spot-setup/install.sh new file mode 100755 index 00000000..0f271f95 --- /dev/null +++ b/spot-setup/install.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +# Create local directories for Apache Spot (Incubating) +# This script needs to be run with root priviliges or it will fail to create the /opt/ directories + +spot_dir=/opt/spot +spot_jar=${spot_dir}/jar +spot_bin=${spot_dir}/bin +spot_env="export PATH=\$PATH:${spot_bin}" +spot_env_file=/etc/profile.d/spot.sh +spot_user=$1 + +# functions + +log_cmd () { + + printf "\n****SPOT.SETUP.install.sh****\n" + date +"%y-%m-%d %H:%M:%S" + printf "$1\n\n" + +} + +create_dir () { + if [[ -d "${1}" ]]; then + log_cmd "${1} already exists" + else + log_cmd "Creating ${1}" + mkdir ${1} + fi +} + +check_root () { +# checking for root as many of these functions interact with system owned directories +if [[ "$EUID" -ne 0 ]]; then + + log_cmd "Non root user detected, Please run as root or with sudo" + exit 1 + +fi +} + +set_env () { +# add directory to env +if [[ -f "${spot_env_file}" ]]; then + log_cmd "${spot_env_file} already exists" +else + log_cmd "Creating ${spot_env_file}" + echo ${spot_env} >> ${spot_env_file} +fi +source ${spot_env_file} +} + +check_root + +# make spot directories +create_dir ${spot_dir} +create_dir ${spot_bin} +create_dir ${spot_jar} + +set_env + +log_cmd "spot-setup complete"