工作背景:利用shell script和curl ,在指定的网址上下载数据,本文中指定的日期范围是2022-03-01到200-06-01,每隔三天形成一个文件,具体代码
#!/bin/bash
mycurl="curl --insecure -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' -H 'Accept-Encoding: gzip, deflate, br' -H 'Accept-Language: zh-CN,zh;q=0.9' -H 'Cache-Control: max-age=0' -H 'Connection: keep-alive' -H 'Cookie: TH_AUTH_ONLINE=MTY1Nzg3MzIzOHxOd3dBTkZwS04wbGFTRmxWU0ZOYVExRklXVVpKUlVoQlFWTlRWelkxVWtSYVJsSlFSa1ZXTWs5U1RsWktVVk5RTTFwTVYwMURORUU9fC5OMQT5IFr5nL7LOBNyw4HwIhsXKJRI9Z_6Uzrr6abT' -H 'Host: ***.**' -H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: none' -H 'Sec-Fetch-User: ?1' -H 'Upgrade-Insecure-Requests: 1' -H 'User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36'"
startDate=20220301
endDate=20220601
startSec=`date -d "$startDate" +"%s"`
endSec=`date -d "$endDate" +"%s"`
j=0
for((i=$startSec;i<=endSec;i+=259200))
do
stepday=`date -d "@$i" "+%Y-%m-%d"`
datearray[$j]=$stepday
let j=$j+1
done
#echo ${datearray[@]}
ipprefix='192.168.103.'
for((m=0;m<${#datearray[@]}-2;m+=1))
do
start_time=${datearray[$m]}
end_time=${datearray[$m+1]}
start_time_sec=`date -d $start_time +%s`
let start_time2_sec=start_time_sec+86400
start_time2=`date -d "@$start_time2_sec" "+%Y-%m-%d"`
#echo "start_time:$start_time start_time2:$start_time2 end_time:$end_time"
for ipnum in `seq 160 175`
do
ipstr=${ipprefix}${ipnum}
echo "iptarget:${ipstr}"
if [ $start_time == '2022-03-01' ]
then
echo "curl is running,iptarget:${ipstr},start_time:${start_time},end_time:${end_time}"
curlcmd="${mycurl} -o ${ipnum}_${start_time}_${end_time}_page1.json \"https://wuji.su/api/bigdata/v1/ip/process?page=0&size=10000&start_time=${start_time}&end_time=${end_time}&condhash=&ip=${ipstr}\""
echo $curlcmd
echo $curlcmd | sh
num=`cat ${ipnum}_${start_time}_${end_time}_page1.json|grep "\"load_all\":true"|wc -l`
while [[ $num == 0 ]]
do
echo $curlcmd | sh
echo "curl is running"
num=`cat ${ipnum}_${start_time}_${end_time}_page1.json|grep "\"load_all\":true"|wc -l`
sleep 5s
done
else
echo "curl is running,iptarget:${ipstr},start_time:${start_time2},end_time:${end_time}"
curlcmd="${mycurl} -o ${ipnum}_${start_time2}_${end_time}_page1.json \"https://wuji.su/api/bigdata/v1/ip/process?page=0&size=10000&start_time=${start_time2}&end_time=${end_time}&condhash=&ip=${ipstr}\""
echo $curlcmd
echo $curlcmd | sh
num=`cat ${ipnum}_${start_time2}_${end_time}_page1.json|grep "\"load_all\":true"|wc -l`
while [[ $num == 0 ]]
do
echo $curlcmd | sh
echo "curl is running"
num=`cat ${ipnum}_${start_time2}_${end_time}_page1.json|grep "\"load_all\":true"|wc -l`
sleep 5s
done
fi
done
done
代码前半部分是将日期写入一个数组,用于计算起始和结束日期
start_time=${datearray[$m]}
end_time=${datearray[$m+1]}
start_time_sec=`date -d $start_time +%s`
let start_time2_sec=start_time_sec+86400
start_time2=`date -d "@$start_time2_sec" "+%Y-%m-%d"`
这段代码是计算日期间隔,本文中为三天