1. 程式人生 > >用戶數據同步

用戶數據同步

sta 偏移 cati 入參 cut -name txt line vpdn


文件名是今日, 數據是昨天的數據。
dayid=`date -d "1 days ago" +%Y%m%d `
##############################################
# 功能: 開始日期加上偏移天數後的日期字符串# 入參: 開始日期,yyyymmdd# 偏移天數# 返回值:開始日期加上偏移天數日期,yyyymmdd###############################################function toDate(){ startdate=$1; days=$2; timestamp_startdate=`date -d ${startdate} +%s` timestamp_resultdate=`expr ${timestamp_startdate} ‘+‘ ${days} ‘*‘ 86400` resultdate=`date -d @${timestamp_resultdate} +%Y%m%d` echo $resultdate}filedayid=`toDate $dayid
1` spark-submit --class="com.zyuc.stat.iot.etl.UserInfoETL" \--master yarn-client \--name UserInfoETL \--conf "spark.app.appName=UserInfoETL" \--conf "spark.app.dataDayid=${dayid}" \--conf "spark.app.userTable=iot_customer_userinfo" \--conf "spark.app.syncType=full" \--conf "spark.app.inputPath=/hadoop/IOT/ANALY_PLATFORM/BasicData/UserInfo/" \
--conf "spark.app.outputPath=/hadoop/IOT/ANALY_PLATFORM/BasicData/output/UserInfo/" \--conf "spark.app.fileWildcard=all_userinfo_qureyes_${filedayid}*" \--conf "spark.app.vpdnInput=/hadoop/IOT/ANALY_PLATFORM/BasicData/VPDNProvince/" \--conf "spark.app.vpdnWildcard=vpdninfo.txt" \--conf spark.yarn.executor.memoryOverhead=700
\--executor-memory 2G \--executor-cores 1 \--num-executors 6 \/slview/test/zcw/jars/userETL.jar




dayid=`date -d "1 days ago" +%Y%m%d `
##############################################
# 功能: 開始日期加上偏移天數後的日期字符串# 入參: 開始日期,yyyymmdd# 偏移天數# 返回值:開始日期加上偏移天數日期,yyyymmdd###############################################function toDate(){ startdate=$1; days=$2; timestamp_startdate=`date -d ${startdate} +%s` timestamp_resultdate=`expr ${timestamp_startdate} ‘+‘ ${days} ‘*‘ 86400` resultdate=`date -d @${timestamp_resultdate} +%Y%m%d` echo $resultdate}filedayid=`toDate $dayid 1` spark-submit --class="com.zyuc.stat.iot.etl.UserInfoETL" \--master yarn-client \--name UserInfoETL \--conf "spark.app.appName=UserInfoETL" \--conf "spark.app.dataDayid=${dayid}" \--conf "spark.app.userTable=iot_customer_userinfo" \--conf "spark.app.syncType=incr" \--conf "spark.app.inputPath=/hadoop/IOT/ANALY_PLATFORM/BasicData/UserInfo/" \--conf "spark.app.outputPath=/hadoop/IOT/ANALY_PLATFORM/BasicData/output/UserInfo/" \--conf "spark.app.fileWildcard=incr_userinfo_qureyes_${filedayid}*" \--conf "spark.app.vpdnInput=/hadoop/IOT/ANALY_PLATFORM/BasicData/VPDNProvince/" \--conf "spark.app.vpdnWildcard=vpdninfo.txt" \--conf spark.yarn.executor.memoryOverhead=700 \--executor-memory 2G \--executor-cores 1 \--num-executors 6 \/slview/test/zcw/jars/userETL.jar

--conf "spark.app.fileWildcard= 如下:all_userinfo_qureyes_20170714*
incr_userinfo_qureyes_20170715*



create table iot_customer_userinfo(vpdncompanycode string, mdn string, imsicdma string, imsilte string, iccid string, imei string, company string, nettype string, vpdndomain string, isvpdn string, subscribetimeaaa string, subscribetimehlr string, subscribetimehss string, subscribetimepcrf string, firstactivetime string, userstatus string, atrbprovince string, userprovince string, crttime string, custProvince string) partitioned by (d int) stored as orc location ‘/hadoop/IOT/ANALY_PLATFORM/BasicData/output/UserInfo/data/‘;

alter table iot_customer_userinfo add IF NOT EXISTS partition(d=‘20170714‘);

shell 調度腳本:增量數據:
$ cat userETL.sh 
dayid=$1
if [ -z $dayid ] ; then
    dayid=`date -d "1 days ago" "+%Y%m%d"`
fi
##############################################
# 功能: 開始日期加上偏移天數後的日期字符串
# 入參: 開始日期,yyyymmdd
#        偏移天數
# 返回值:開始日期加上偏移天數日期,yyyymmdd
###############################################
function toDate()
{
   startdate=$1;
   days=$2;
   timestamp_startdate=`date -d ${startdate} +%s`
   timestamp_resultdate=`expr ${timestamp_startdate} ‘+‘ ${days} ‘*‘ 86400`
   resultdate=`date -d @${timestamp_resultdate} +%Y%m%d`
   echo $resultdate
}
filedayid=`toDate $dayid 1` 
spark-submit --class="com.zyuc.stat.iot.etl.UserInfoETL"  --master yarn-client  --name UserInfoETL --conf "spark.app.appName=UserInfoETL" --conf "spark.app.dataDayid=${dayid}" --conf "spark.app.userTable=iot_customer_userinfo" --conf "spark.app.syncType=incr" --conf "spark.app.inputPath=/hadoop/IOT/ANALY_PLATFORM/BasicData/UserInfo/"  --conf "spark.app.outputPath=/hadoop/IOT/ANALY_PLATFORM/BasicData/output/UserInfo/"  --conf "spark.app.fileWildcard=incr_userinfo_qureyes_${filedayid}*" --conf "spark.app.vpdnInput=/hadoop/IOT/ANALY_PLATFORM/BasicData/VPDNProvince/"  --conf "spark.app.vpdnWildcard=vpdninfo.txt" --conf spark.yarn.executor.memoryOverhead=700 --executor-memory 2G --executor-cores  1 --num-executors  6 /slview/test/zcw/shell/userinfo/jars/userETL.jar >/slview/test/zcw/shell/userinfo/logs/${dayid}.log   2>&1




















用戶數據同步