• 基于日期、时间、经纬度下载MODIS数据并批处理


    一、利用python基于日期、时间和经纬度批量下载MODIS数据

    我想根据一些实测点下载对应时间和位置的MODIS数据(5min一景的产品)作为对比。

    之前想了很多种方法,比如基于GEE什么的,但是我下载的MODIS产品在GEE上没有。

    于是后来考虑可以用这个网站

    | National Snow and Ice Data Center

    这个网站可以搜索比如MYD29是我需要下载的产品,然后搜索MOD29 download就可以进入

    MODIS/Terra Sea Ice Extent 5-Min L2 Swath 1km, Version 61 | National Snow and Ice Data Center

    这里有很多下载的方式,选第二个Data Access Tool,get data.

    然后可以用其中的一条实测点数据,输入到左边的输入框中,这样右边就有数据了,然后点击

    Download Script就可以下载到下载这个条件数据的python代码,以下是代码,修改当中函数的username和password即可,然后再修改main函数,设定不同的bounding box(位置)和time什么的按照你的要求搜索并下载数据就行啦。

    但是这个程序经常会因为网络不稳定而断掉,所以可能需要自己重启,或者再exception里面修改进行重启。但是值得一提的是,global 变量并不是在函数里修改然后再次进入这个函数就可以接着运行的,必须要重新传入(如果有修改的话),否在就重启后还是最上面的那个值。

    1. #!/usr/bin/env python
    2. # ----------------------------------------------------------------------------
    3. # NSIDC Data Download Script
    4. #
    5. # Copyright (c) 2023 Regents of the University of Colorado
    6. # Permission is hereby granted, free of charge, to any person obtaining
    7. # a copy of this software and associated documentation files (the "Software"),
    8. # to deal in the Software without restriction, including without limitation
    9. # the rights to use, copy, modify, merge, publish, distribute, sublicense,
    10. # and/or sell copies of the Software, and to permit persons to whom the
    11. # Software is furnished to do so, subject to the following conditions:
    12. # The above copyright notice and this permission notice shall be included
    13. # in all copies or substantial portions of the Software.
    14. #
    15. # Tested in Python 2.7 and Python 3.4, 3.6, 3.7, 3.8, 3.9
    16. #
    17. # To run the script at a Linux, macOS, or Cygwin command-line terminal:
    18. # $ python nsidc-data-download.py
    19. #
    20. # On Windows, open Start menu -> Run and type cmd. Then type:
    21. # python nsidc-data-download.py
    22. #
    23. # The script will first search Earthdata for all matching files.
    24. # You will then be prompted for your Earthdata username/password
    25. # and the script will download the matching files.
    26. #
    27. # If you wish, you may store your Earthdata username/password in a .netrc
    28. # file in your $HOME directory and the script will automatically attempt to
    29. # read this file. The .netrc file should have the following format:
    30. # machine urs.earthdata.nasa.gov login MYUSERNAME password MYPASSWORD
    31. # where 'MYUSERNAME' and 'MYPASSWORD' are your Earthdata credentials.
    32. #
    33. # Instead of a username/password, you may use an Earthdata bearer token.
    34. # To construct a bearer token, log into Earthdata and choose "Generate Token".
    35. # To use the token, when the script prompts for your username,
    36. # just press Return (Enter). You will then be prompted for your token.
    37. # You can store your bearer token in the .netrc file in the following format:
    38. # machine urs.earthdata.nasa.gov login token password MYBEARERTOKEN
    39. # where 'MYBEARERTOKEN' is your Earthdata bearer token.
    40. #
    41. from __future__ import print_function
    42. import base64
    43. import getopt
    44. import itertools
    45. import json
    46. import math
    47. import netrc
    48. import os.path
    49. import ssl
    50. import sys
    51. import time
    52. from getpass import getpass
    53. try:
    54. from urllib.parse import urlparse
    55. from urllib.request import urlopen, Request, build_opener, HTTPCookieProcessor
    56. from urllib.error import HTTPError, URLError
    57. except ImportError:
    58. from urlparse import urlparse
    59. from urllib2 import urlopen, Request, HTTPError, URLError, build_opener, HTTPCookieProcessor
    60. short_name = 'MYD29'
    61. version = '61'
    62. time_start = '2002-07-04T00:00:00Z'
    63. time_end = '2023-11-07T04:01:18Z'
    64. bounding_box = ''
    65. polygon = ''
    66. filename_filter = '*MYD29.A2020001.1855.061.2020321085433*'
    67. url_list = []
    68. CMR_URL = 'https://cmr.earthdata.nasa.gov'
    69. URS_URL = 'https://urs.earthdata.nasa.gov'
    70. CMR_PAGE_SIZE = 2000
    71. CMR_FILE_URL = ('{0}/search/granules.json?provider=NSIDC_ECS'
    72. '&sort_key[]=start_date&sort_key[]=producer_granule_id'
    73. '&scroll=true&page_size={1}'.format(CMR_URL, CMR_PAGE_SIZE))
    74. def get_username():
    75. username = ''
    76. # For Python 2/3 compatibility:
    77. try:
    78. do_input = raw_input # noqa
    79. except NameError:
    80. do_input = input
    81. username = do_input('Earthdata username (or press Return to use a bearer token): ')
    82. return username
    83. def get_password():
    84. password = ''
    85. while not password:
    86. password = getpass('password: ')
    87. return password
    88. def get_token():
    89. token = ''
    90. while not token:
    91. token = getpass('bearer token: ')
    92. return token
    93. def get_login_credentials():
    94. """Get user credentials from .netrc or prompt for input."""
    95. credentials = None
    96. token = None
    97. try:
    98. info = netrc.netrc()
    99. username, account, password = info.authenticators(urlparse(URS_URL).hostname)
    100. if username == 'token':
    101. token = password
    102. else:
    103. credentials = '{0}:{1}'.format(username, password)
    104. credentials = base64.b64encode(credentials.encode('ascii')).decode('ascii')
    105. except Exception:
    106. username = None
    107. password = None
    108. if not username:
    109. username = get_username()
    110. if len(username):
    111. password = get_password()
    112. credentials = '{0}:{1}'.format(username, password)
    113. credentials = base64.b64encode(credentials.encode('ascii')).decode('ascii')
    114. else:
    115. token = get_token()
    116. return credentials, token
    117. def build_version_query_params(version):
    118. desired_pad_length = 3
    119. if len(version) > desired_pad_length:
    120. print('Version string too long: "{0}"'.format(version))
    121. quit()
    122. version = str(int(version)) # Strip off any leading zeros
    123. query_params = ''
    124. while len(version) <= desired_pad_length:
    125. padded_version = version.zfill(desired_pad_length)
    126. query_params += '&version={0}'.format(padded_version)
    127. desired_pad_length -= 1
    128. return query_params
    129. def filter_add_wildcards(filter):
    130. if not filter.startswith('*'):
    131. filter = '*' + filter
    132. if not filter.endswith('*'):
    133. filter = filter + '*'
    134. return filter
    135. def build_filename_filter(filename_filter):
    136. filters = filename_filter.split(',')
    137. result = '&options[producer_granule_id][pattern]=true'
    138. for filter in filters:
    139. result += '&producer_granule_id[]=' + filter_add_wildcards(filter)
    140. return result
    141. def build_cmr_query_url(short_name, version, time_start, time_end,
    142. bounding_box=None, polygon=None,
    143. filename_filter=None):
    144. params = '&short_name={0}'.format(short_name)
    145. params += build_version_query_params(version)
    146. params += '&temporal[]={0},{1}'.format(time_start, time_end)
    147. if polygon:
    148. params += '&polygon={0}'.format(polygon)
    149. elif bounding_box:
    150. params += '&bounding_box={0}'.format(bounding_box)
    151. if filename_filter:
    152. params += build_filename_filter(filename_filter)
    153. return CMR_FILE_URL + params
    154. def get_speed(time_elapsed, chunk_size):
    155. if time_elapsed <= 0:
    156. return ''
    157. speed = chunk_size / time_elapsed
    158. if speed <= 0:
    159. speed = 1
    160. size_name = ('', 'k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y')
    161. i = int(math.floor(math.log(speed, 1000)))
    162. p = math.pow(1000, i)
    163. return '{0:.1f}{1}B/s'.format(speed / p, size_name[i])
    164. def output_progress(count, total, status='', bar_len=60):
    165. if total <= 0:
    166. return
    167. fraction = min(max(count / float(total), 0), 1)
    168. filled_len = int(round(bar_len * fraction))
    169. percents = int(round(100.0 * fraction))
    170. bar = '=' * filled_len + ' ' * (bar_len - filled_len)
    171. fmt = ' [{0}] {1:3d}% {2} '.format(bar, percents, status)
    172. print('\b' * (len(fmt) + 4), end='') # clears the line
    173. sys.stdout.write(fmt)
    174. sys.stdout.flush()
    175. def cmr_read_in_chunks(file_object, chunk_size=1024 * 1024):
    176. """Read a file in chunks using a generator. Default chunk size: 1Mb."""
    177. while True:
    178. data = file_object.read(chunk_size)
    179. if not data:
    180. break
    181. yield data
    182. def get_login_response(url, credentials, token):
    183. opener = build_opener(HTTPCookieProcessor())
    184. req = Request(url)
    185. if token:
    186. req.add_header('Authorization', 'Bearer {0}'.format(token))
    187. elif credentials:
    188. try:
    189. response = opener.open(req)
    190. # We have a redirect URL - try again with authorization.
    191. url = response.url
    192. except HTTPError:
    193. # No redirect - just try again with authorization.
    194. pass
    195. except Exception as e:
    196. print('Error{0}: {1}'.format(type(e), str(e)))
    197. sys.exit(1)
    198. req = Request(url)
    199. req.add_header('Authorization', 'Basic {0}'.format(credentials))
    200. try:
    201. response = opener.open(req)
    202. except HTTPError as e:
    203. err = 'HTTP error {0}, {1}'.format(e.code, e.reason)
    204. if 'Unauthorized' in e.reason:
    205. if token:
    206. err += ': Check your bearer token'
    207. else:
    208. err += ': Check your username and password'
    209. print(err)
    210. sys.exit(1)
    211. except Exception as e:
    212. print('Error{0}: {1}'.format(type(e), str(e)))
    213. sys.exit(1)
    214. return response
    215. def cmr_download(urls, force=False, quiet=False):
    216. """Download files from list of urls."""
    217. if not urls:
    218. return
    219. url_count = len(urls)
    220. if not quiet:
    221. print('Downloading {0} files...'.format(url_count))
    222. credentials = None
    223. token = None
    224. for index, url in enumerate(urls, start=1):
    225. if not credentials and not token:
    226. p = urlparse(url)
    227. if p.scheme == 'https':
    228. credentials, token = get_login_credentials()
    229. filename = url.split('/')[-1]
    230. if not quiet:
    231. print('{0}/{1}: {2}'.format(str(index).zfill(len(str(url_count))),
    232. url_count, filename))
    233. try:
    234. response = get_login_response(url, credentials, token)
    235. length = int(response.headers['content-length'])
    236. try:
    237. if not force and length == os.path.getsize(filename):
    238. if not quiet:
    239. print(' File exists, skipping')
    240. continue
    241. except OSError:
    242. pass
    243. count = 0
    244. chunk_size = min(max(length, 1), 1024 * 1024)
    245. max_chunks = int(math.ceil(length / chunk_size))
    246. time_initial = time.time()
    247. with open(filename, 'wb') as out_file:
    248. for data in cmr_read_in_chunks(response, chunk_size=chunk_size):
    249. out_file.write(data)
    250. if not quiet:
    251. count = count + 1
    252. time_elapsed = time.time() - time_initial
    253. download_speed = get_speed(time_elapsed, count * chunk_size)
    254. output_progress(count, max_chunks, status=download_speed)
    255. if not quiet:
    256. print()
    257. except HTTPError as e:
    258. print('HTTP error {0}, {1}'.format(e.code, e.reason))
    259. except URLError as e:
    260. print('URL error: {0}'.format(e.reason))
    261. except IOError:
    262. raise
    263. def cmr_filter_urls(search_results):
    264. """Select only the desired data files from CMR response."""
    265. if 'feed' not in search_results or 'entry' not in search_results['feed']:
    266. return []
    267. entries = [e['links']
    268. for e in search_results['feed']['entry']
    269. if 'links' in e]
    270. # Flatten "entries" to a simple list of links
    271. links = list(itertools.chain(*entries))
    272. urls = []
    273. unique_filenames = set()
    274. for link in links:
    275. if 'href' not in link:
    276. # Exclude links with nothing to download
    277. continue
    278. if 'inherited' in link and link['inherited'] is True:
    279. # Why are we excluding these links?
    280. continue
    281. if 'rel' in link and 'data#' not in link['rel']:
    282. # Exclude links which are not classified by CMR as "data" or "metadata"
    283. continue
    284. if 'title' in link and 'opendap' in link['title'].lower():
    285. # Exclude OPeNDAP links--they are responsible for many duplicates
    286. # This is a hack; when the metadata is updated to properly identify
    287. # non-datapool links, we should be able to do this in a non-hack way
    288. continue
    289. filename = link['href'].split('/')[-1]
    290. if filename in unique_filenames:
    291. # Exclude links with duplicate filenames (they would overwrite)
    292. continue
    293. unique_filenames.add(filename)
    294. urls.append(link['href'])
    295. return urls
    296. def cmr_search(short_name, version, time_start, time_end,
    297. bounding_box='', polygon='', filename_filter='', quiet=False):
    298. """Perform a scrolling CMR query for files matching input criteria."""
    299. cmr_query_url = build_cmr_query_url(short_name=short_name, version=version,
    300. time_start=time_start, time_end=time_end,
    301. bounding_box=bounding_box,
    302. polygon=polygon, filename_filter=filename_filter)
    303. if not quiet:
    304. print('Querying for data:\n\t{0}\n'.format(cmr_query_url))
    305. cmr_scroll_id = None
    306. ctx = ssl.create_default_context()
    307. ctx.check_hostname = False
    308. ctx.verify_mode = ssl.CERT_NONE
    309. urls = []
    310. hits = 0
    311. while True:
    312. req = Request(cmr_query_url)
    313. if cmr_scroll_id:
    314. req.add_header('cmr-scroll-id', cmr_scroll_id)
    315. try:
    316. response = urlopen(req, context=ctx)
    317. except Exception as e:
    318. print('Error: ' + str(e))
    319. sys.exit(1)
    320. if not cmr_scroll_id:
    321. # Python 2 and 3 have different case for the http headers
    322. headers = {k.lower(): v for k, v in dict(response.info()).items()}
    323. cmr_scroll_id = headers['cmr-scroll-id']
    324. hits = int(headers['cmr-hits'])
    325. if not quiet:
    326. if hits > 0:
    327. print('Found {0} matches.'.format(hits))
    328. else:
    329. print('Found no matches.')
    330. search_page = response.read()
    331. search_page = json.loads(search_page.decode('utf-8'))
    332. url_scroll_results = cmr_filter_urls(search_page)
    333. if not url_scroll_results:
    334. break
    335. if not quiet and hits > CMR_PAGE_SIZE:
    336. print('.', end='')
    337. sys.stdout.flush()
    338. urls += url_scroll_results
    339. if not quiet and hits > CMR_PAGE_SIZE:
    340. print()
    341. return urls
    342. def main(argv=None):
    343. global short_name, version, time_start, time_end, bounding_box, \
    344. polygon, filename_filter, url_list
    345. if argv is None:
    346. argv = sys.argv[1:]
    347. force = False
    348. quiet = False
    349. usage = 'usage: nsidc-download_***.py [--help, -h] [--force, -f] [--quiet, -q]'
    350. try:
    351. opts, args = getopt.getopt(argv, 'hfq', ['help', 'force', 'quiet'])
    352. for opt, _arg in opts:
    353. if opt in ('-f', '--force'):
    354. force = True
    355. elif opt in ('-q', '--quiet'):
    356. quiet = True
    357. elif opt in ('-h', '--help'):
    358. print(usage)
    359. sys.exit(0)
    360. except getopt.GetoptError as e:
    361. print(e.args[0])
    362. print(usage)
    363. sys.exit(1)
    364. # Supply some default search parameters, just for testing purposes.
    365. # These are only used if the parameters aren't filled in up above.
    366. if 'short_name' in short_name:
    367. short_name = 'ATL06'
    368. version = '003'
    369. time_start = '2018-10-14T00:00:00Z'
    370. time_end = '2021-01-08T21:48:13Z'
    371. bounding_box = ''
    372. polygon = ''
    373. filename_filter = '*ATL06_2020111121*'
    374. url_list = []
    375. try:
    376. if not url_list:
    377. url_list = cmr_search(short_name, version, time_start, time_end,
    378. bounding_box=bounding_box, polygon=polygon,
    379. filename_filter=filename_filter, quiet=quiet)
    380. cmr_download(url_list, force=force, quiet=quiet)
    381. except KeyboardInterrupt:
    382. quit()
    383. if __name__ == '__main__':
    384. main()

    二、批量处理MODIS数据

    这个用HEG这个即可,就只需要选择input-file,然后选择第二个,输入一个hdf文件,然后下面各种参数调整,最后选择batch run就行,他就会把文件夹里的MODIS数据都跑了,在HEGOUT文件夹里有输出的tif。但是值得注意的是,我发现似乎最多只能处理900多一些景。然后就不跑了,不知道为什么

  • 相关阅读:
    C# 参数名加冒号,可以打乱参数顺序
    promise详细的适用
    Zookeeper使用介绍与集群搭建实战
    Vue响应式内容丢失处理
    服务器显卡:驱动高性能计算和人工智能应用
    【修复】centos定时任务python top不能输出
    Spring Cloud 架构设计之linux yum redis
    论算法是如何优化的:四数之和
    sentinel架构底层原理剖析详解
    WIFI攻击方法总结
  • 原文地址:https://blog.csdn.net/XinemaChen/article/details/134277059