1. 程式人生 > >Spark 引用ProtoBuffer報錯:Unable to find proto buffer class

Spark 引用ProtoBuffer報錯:Unable to find proto buffer class

使用 yarn 的spark-submit提交時,加入超序列化引數:

--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \

另,附:Spark-submit提交作業的shell版本模板。

一個好的shell指令碼,不僅讓人看起來賞心悅目,更易於查詢問題,方便交接

#!/bin/sh

>>descripe
shell template
@author zjf
@date 2018-05-28
descripe

## source files
. /etc/profile
. ~/.bash_profile

## open spark GC log
export SPARK_SUBMIT_OPTS=" -Xloggc:tmp/gc_log -XX:+PrintGCApplicationStoppedTime -XX:+PrintGCDetails -XX:+PrintGCDateStamps -verbose:gc -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M "

## args check
if [ $# -eq 0 ]
then
        CUR_DATE=`date -d "-1 day"  +%Y-%m-%d`
        CUR_PARTITION=`date -d "-1 day"  +%Y%m%d`
        BEGIN_PARTITION=`date -d "-1 day"  +%Y%m%d`
        END_PARTITION=`date -d "-1 day"  +%Y%m%d`
        BEGIN_DATE=`date -d "-1 day"  +%Y-%m-%d`
        END_DATE=`date -d "-1 day"  +%Y-%m-%d`
elif [ $# -eq 1 ]
then
        format_day=`echo $1|grep -o '[0-9]\{8\}'`
        format_hour=`echo $1|grep -o '[0-9]\{2\}$'`
        CUR_DATE=`date -d "$format_day" +%Y-%m-%d`
        CUR_PARTITION=`date -d "$format_day" +%Y%m%d`
        LAST_MONTH=`date -d "${format_day} -1 month" +%Y%m`
        BEGIN_DATE=`date -d "${format_day}"  +%Y-%m-%d`
        END_DATE=`date -d "${format_day}"  +%Y-%m-%d`
        BEGIN_PARTITION=`date -d "${format_day}"  +%Y%m%d`
        END_PARTITION=`date -d "${format_day}"  +%Y%m%d`
else
        echo "the args is wrong ,you should give it like '20170718'"
        exit 1;
fi

WORK_DIR=$(cd `dirname $0`; pwd)
BASH_DIR=$WORK_DIR/bin
DATA_DIR=$WORK_DIR/data/$CUR_PARTITION
LOGS_DIR=$WORK_DIR/log/$CUR_PARTITION

echo "CUR_DATE=${CUR_DATE}"
echo "CUR_PARTITION=${CUR_PARTITION}"
echo "LAST_MONTH=${LAST_MONTH}"
echo "BEGIN_PARTITION=${BEGIN_PARTITION}"
echo "END_PARTITION=${END_PARTITION}"
echo "BEGIN_DATE=${BEGIN_DATE}"
echo "END_DATE=${END_DATE}"

COMMAND="$1"
echo "RUN BATCH : ${COMMAND}"

class_home="cn.com.xiaomi.Test"

/home/zjf/bin/spark-submit \
        --cluster hadoopprc-hadoop-spark2.1 \
        --conf spark.yarn.job.owners=xxx \
        --conf spark.yarn.alert.phone.number=188888899 \
        --conf spark.yarn.alert.mail.address='
[email protected]
' \ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ --conf spark.storage.memoryFraction=0.5 \ --conf spark.shuffle.memoryFraction=0.3 \ --class ${class_home} \ --master yarn \ --deploy-mode cluster \ --queue production.queue..miui.game \ --conf spark.executor.extraJavaOptions=\"-XX:MaxDirectMemorySize=1024m\" \ --conf spark.default.parallelism=1600 \ --driver-memory 14g \ --executor-memory 14g \ --executor-cores 2 \ --num-executors 400 \ $WORK_DIR/huyu-cdi-spark-1.0-SNAPSHOT.jar \ ${COMMAND}