1. 程式人生 > >Linux備份-刪除指定日期內檔案

Linux備份-刪除指定日期內檔案

#!/usr/bin/env bash
source /etc/profile
echo " *************** start filter ***************  " # get befor six month last day #m0=$(date -d "$(date -d 'month' +%Y%m01) -1 day" +%Y%m%d) #echo ${m0} #m1=$(date -d "$(date -d '0 month' +%Y%m01) -1 day" +%Y%m%d)
#echo ${m1} #m2=$(date -d "$(date -d last-month +%Y%m01) -1 day" +%Y%m%d) #echo ${m2} #m3=$(date -d "$(date -d ${m2} +%Y%m01) -1 day" +%Y%m%d) #echo ${m3} #m4=$(date -d "$(date -d ${m3} +%Y%m01) -1 day" +%Y%m%d) #echo ${m4} #m5=$(date -d "$(date -d ${m4} +%Y%m01) -1 day" +%Y%m%d)
#echo ${m5} #m6=$(date -d "$(date -d ${m5} +%Y%m01) -1 day" +%Y%m%d) #echo ${m6}
# 取得當前月的最後一天,訪問陣列長度:${#m[*]} + ${#m[@]} m [ 0 ]= $(date -d " $(date -d ' month ' +%Y%m01) -1 day " +%Y%m%d) echo m0 : ${m[ 0 ]} ' month :
' ${#m[ @ ]} for n in $(seq 0 11 ) ; do     m [ $n + 1 ]= $(date -d " $(date -d ${m[$n]} +%Y%m01) -1 day " +%Y%m%d)     echo m $[$n+ 1 ] : ${m[$n + 1 ]} ' month : ' ${#m[ * ]} ; done
echo " ****** time : " $(date ' +%Y-%m-%d %H:%M:%S ' ) " ****** "
max_date = 0 # get the latest file and copy to hdfs cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter for dir in $( ls -l ./ | awk ' /^d/{print $NF} ' ) do    if [[ -d $dir && $dir == *\_* ]] ; then       f_d = $( echo $dir | cut -d \_ -f 3 | cut -d \. -f 1 )       if [[ $max_date < $f_d ]] ; then         max_date = $f_d         max_filter = $dir       fi    fi done echo " max date is : " $max_date echo " max filter is : " $max_filter pwd # 複製最近日期的filter檔案到hdfs hadoop fs -test -e /data/datacenter/run_center_spark_stream/bloom_filters/ $max_filter if [[ $? == 0 ]] ; then     echo " filter is already exist : " $max_filter else     echo " start hdfs copy "     echo " ****** start time : " $(date ' +%Y-%m-%d %H:%M:%S ' ) " ****** "     hadoop fs -put $max_filter /data/datacenter/run_center_spark_stream/bloom_filters     echo " ****** end time : " $(date ' +%Y-%m-%d %H:%M:%S ' ) " ****** " fi
remove_week = $(date -d " $max_date 7 days ago " +%Y%m%d) echo " 刪除本地序列化檔案的日期界限: " $remove_week remove_date = $(date -d " $max_date 30 days ago " +%Y%m%d) echo " 刪除檔案 和 Hadoop filter 的日期界限: " $remove_date
echo " *************** start remove filter ***************  " for r_dir in $( ls -l ./ | awk ' /^d/{print $NF} ' ) do    if [[ -d $r_dir && $r_dir == *\_* ]] ; then       r_d = $( echo $r_dir | cut -d \_ -f 3 | cut -d \. -f 1 )       if [[ $r_d < $remove_date ]] ; then           if [[ ${m[ * ]} == * $r_d * ]] ; then               cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter/ $r_dir               pwd               for f_dir in $( ls *)               do                  if [[ " $f_dir " == "mau_device_all.FILTER.SER" ]] ; then                     echo " ------ keep mau_filter is: " $f_dir ;                  else                     echo " remove file is: " $f_dir ;                     rm -r $f_dir                  fi               done               cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter               pwd           else               echo " remove filter_dir is: " $r_dir               rm -r $r_dir           fi       elif [[ $r_d < $remove_week ]] ; then           if [[ $r_d == $m0 || $r_d == $m1 || $r_d == $m2 ]] ; then               cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter/ $r_dir               pwd               for f_dir in $( ls *)               do                  if [[ " $f_dir " == "mau_device_all.FILTER.SER" ]] ; then                     echo " ------ week keep mau_filter is: " $f_dir ;                  else                     if [[ " $f_dir " == *.FILTER.SER ]] ; then                         echo " - last day of month - week remove file is: " $f_dir ;                         rm -r $f_dir                     fi                  fi               done               cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter               pwd           else               echo " week remove filter is: " $r_dir               rm -r $r_dir /*.FILTER.SER           fi       fi    fi done
echo " =============== start remove hdfs filter ===============  " # 刪除hdfs上指定日期外的tdid for h_filter in $(hadoop fs -ls /data/datacenter/run_center_spark_stream/bloom_filters | awk ' {print $8} ' ) do     if [[ $h_filter == *\_* ]] ; then         h_date = $( echo $h_filter | cut -d \/ -f 6 | cut -d \_ -f 3 | cut -d \. -f 1 ) #        echo " hdfs date : "$h_date #        echo " hdfs filter : "$h_filter         if [[ ${m[ * ]} == * $h_date * ]] ; then             echo " remain hdfs filter is : " $h_filter         elif [[ $h_date < $remove_date ]] ; then             echo " not remain date is : " $h_date             echo " remove hdfs filter is : " $h_filter             hadoop fs -rmr $h_filter         fi     fi done
echo " -------------- start tdid ---------------  " # 刪除小於30天的tdid cd /home/hadoop/streaming_run_center/tmp/checkpoint/tdidinfo for tdid in $( ls *) do     if [[ $tdid == *\_* ]] ; then         t_d = $( echo $tdid | cut -d \_ -f 2 | cut -d \. -f 1 )         if [[ $t_d == $max_date || $t_d > $max_date ]] ; then             echo " need copy date : " $t_d             echo " need copy tdid : " $tdid             # 檢查tdid是否存在 #            hadoop fs -test -e jiaojiao/tdid/$tdid #            if [[ $? == 0 ]]; then #                echo " tdid is already exist,remove it first " #                hadoop fs -rm jiaojiao/tdid/$tdid #                hadoop fs -put $tdid jiaojiao/tdid #            else #                echo " start copy " #                hadoop fs -put $tdid jiaojiao/tdid #            fi         elif [[ $t_d < $remove_date ]] ; then             echo " remove tdid : " $tdid             rm $tdid         fi     fi done
#echo " =============== start remove hdfs tdid ===============  " #for h_tdid in $(hadoop fs -ls jiaojiao/tdid | awk '{print $8}') #do #    if [[ $h_tdid == *\_* ]]; then #        h_date=$(echo $h_tdid | cut -d \_ -f 2 | cut -d \. -f 1) #        echo $h_date #        echo $h_tdid #    fi #done