您现在的位置是:首页 >技术教程 >windows & Linux :python 脚本 下载日本葵花8号卫星数据L1级产品,自定义分辨率、时间网站首页技术教程

windows & Linux :python 脚本 下载日本葵花8号卫星数据L1级产品,自定义分辨率、时间

简朴-ocean 2023-05-16 20:00:02
简介windows & Linux :python 脚本 下载日本葵花8号卫星数据L1级产品,自定义分辨率、时间

前言介绍

  • 近期需要用到日本葵花8号卫星数据,用于相关研究,而通过官方提供的下载方法,难以针对性的下载所需要的数据类型,因此这里编写了一个针对葵花8号卫星L1级数据产品的脚本下载,主要实现两个功能:
  • 1、自定义时间下载
  • 2、选择任意的分辨率下载
    在这里插入图片描述

完整代码,下载5km分辨率,30min一次的指定时间内的数据

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
# ================================================================================================
# Author: Jianpu | Affiliation: Hohai
# Email : 211311040008@hhu.edu.cn
# Last modified: 2023-04-04 12:28:06
# Filename: download_kuihua8_L1.py
# Description: 1.日本葵花8号卫星,L1级数据产品下载脚本
#              2.需要提前下载好对于葵花8卫星产品下载的安装包: lb_toolkits.tools
#              3.在官网注册获取下载的用户名和密码以及链接网址
#              4. 可以选择两个分辨率进行下载:5km和2km
#			   5. 可以选择下载的频率:10min一次或者30min一次
# =================================================================================================

"""
import os
import sys
import datetime
import time
from lb_toolkits.tools import ftppro


    
class downloadH8(object):

    def __init__(self, username, password):

        self.ftp = ftppro(FTPHOST, username, password)


    def search_ahi8_l1_netcdf(self, starttime, endtime=None, pattern=None, skip=False):
        '''
        下载葵花8号卫星L1 NetCDF数据文件
        Parameters
        ----------
        starttime : datetime
            下载所需数据的起始时间
        endtime : datetime
            下载所需数据的起始时间
        pattern: list, optional
            模糊匹配参数
        Returns
        -------
            list
            下载的文件列表
        '''

        if endtime is None :
            endtime = starttime

        downfilelist = []

        nowdate = starttime
        while nowdate <= endtime :
            # 拼接H8 ftp 目录
            sourceRoot = os.path.join('/jma/netcdf', nowdate.strftime("%Y%m"), nowdate.strftime("%d"))
            sourceRoot = sourceRoot.replace('\','/')

            # 获取文件列表
            filelist = self.GetFileList(starttime, endtime, sourceRoot, pattern)
            
            # filelist = [f for f in filelist if f.startswith('NC_H08_') and f.endswith('.06001_06001.nc')]
            
            if len(filelist) == 0 :
                nowdate += datetime.timedelta(days=1)
                print('未匹配当前时间【%s】的文件' %(nowdate.strftime('%Y-%m-%d')))
                continue

            nowdate += datetime.timedelta(days=1)
            downfilelist.extend(filelist)

        return downfilelist
    def GetFileList(self, starttime, endtime, srcpath, pattern=None):
        ''' 根据输入时间,匹配获取H8 L1数据文件名  '''
        downfiles = []

        srcpath = srcpath.replace('\', '/')

        filelist = self.ftp.listdir(srcpath)
        filelist.sort()
        for filename in filelist :
            namelist = filename.split('_')
            nowdate = datetime.datetime.strptime('%s %s' %(namelist[2], namelist[3]), '%Y%m%d %H%M')

            if (nowdate < starttime) | (nowdate > endtime) :
                continue

            downflag = True
            # 根据传入的匹配参数,匹配文件名中是否包含相应的字符串
            if pattern is not None :
                if isinstance(pattern, list) :
                    for item in pattern :
                        if item in filename :
                            downflag = True
                            # break
                        else:
                            downflag = False
                            break
                elif isinstance(pattern, str) :
                    if pattern in filename :
                        downflag = True
                    else:
                        downflag = False

            if downflag :
                srcname = os.path.join(srcpath, filename)
                srcname = srcname.replace('\','/')

                downfiles.append(srcname)

        return downfiles
    def download(self, outdir, srcfile, blocksize=1*1024, skip=False):
        """通过ftp接口下载H8 L1数据文件"""

        if not os.path.exists(outdir):
            os.makedirs(outdir)
            print('成功创建路径:%s' %(outdir))

        if isinstance(srcfile, list) :
            count = len(srcfile)
            for srcname in srcfile:
                count -= 1
                self._download(outdir, srcname, blocksize=blocksize, skip=skip, count=count+1)

        elif isinstance(srcfile, str) :
            self._download(outdir, srcfile, blocksize=blocksize, skip=skip)

    def _download(self, outdir, srcname, blocksize=1*1024, skip=False, count=1):

        print('='*100)
        basename = os.path.basename(srcname)
        dstname = os.path.join(outdir, basename)

        if skip :
            return srcname

        if os.path.isfile(dstname) :
            print('文件已存在,跳过下载>>【%s】' %(dstname))
            return srcname

        stime = time.time()
        print(datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'),
              '开始下载文件【%d】: %s'%(count, srcname))

        if self.ftp.downloadFile(srcname, outdir, blocksize=blocksize):
            print(datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'),
                  '成功下载文件【%s】:%s' %(count, dstname))
        else:
            print(datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'),
                  '下载文件失败【%s】:%s' %(count, dstname))

        etime = time.time()
        print('下载文件共用%.2f秒' %(etime - stime))

        return srcname
 
    
def check_data_completeness(file_list, start_time, end_time):
    
    expected_num_files = (end_time - start_time).days *48 + 48  # 48 show 30min/time; 144 show 10min/time
    actual_num_files = len(file_list)

    if actual_num_files == expected_num_files:
        print("已经下载了全部数据。")
    else:
        print("有 %d 个数据文件缺失。" % (expected_num_files - actual_num_files))
        expected_file_names = []
        actual_file_names = []

        for i in range(expected_num_files):
            file_time = start_time + datetime.timedelta(minutes=i * 30)
            file_name = "NC_H08_%s_R21_FLDK.06001_06001.nc" % (file_time.strftime("%Y%m%d_%H%M"))
            expected_file_names.append(file_name)

        for file_path in file_list:
            file_name = os.path.basename(file_path)
            actual_file_names.append(file_name)

        missing_file_names = set(expected_file_names) - set(actual_file_names)

        for missing_file_name in missing_file_names:
            print("缺失文件:%s" % missing_file_name)

    
  


FTPHOST='ftp.ptree.jaxa.jp'

# create an instance of the downloadH8 class
h8_downloader = downloadH8('xxx', 'xxx')
#
# search for H8 files for a specific date
start_time = datetime.datetime(2017, 1, 1)
end_time = datetime.datetime(2017, 1,31,23, 59, 59)
file_list = h8_downloader.search_ahi8_l1_netcdf(start_time, end_time,pattern=['R21','02401_02401'])


# 选取每30分钟的数据文件名
selected_files = []
for file in file_list:
    if file.endswith(".nc"):
        if file[40:42] in ["00", "30"]:
            selected_files.append(file)

# 打印选取的文件名
print(selected_files)


check_data_completeness(selected_files,start_time, end_time)

from tqdm import tqdm 

for file in tqdm(selected_files):
    
    
    try:
        h8_downloader.download('/media/DATA/kuihua8_5km/', file)
    except ValueError as e:
        print(str(e))
        os.remove(os.path.join('/media/DATA/kuihua8_5km/', os.path.basename(file)))
        h8_downloader.download('/media/DATA/kuihua8_5km/', file)


脚本主要完成如下功能:

  • 创建一个downloadH8类,用于FTP文件下载相关操作;
  • 定义了search_ahi8_l1_netcdf函数,用于搜索指定时间范围内的葵花8号卫星L1 NetCDF数据文件,返回下载的文件列表;
  • 定义了GetFileList函数,用于获取FTP服务器上指定路径下的文件列表,并根据输入时间和匹配参数筛选符合条件的文件进行返回;

搜寻数据和获取服务器文件的脚本如下:

 def search_ahi8_l1_netcdf(self, starttime, endtime=None, pattern=None, skip=False):
        '''
        下载葵花8号卫星L1 NetCDF数据文件
        Parameters
        ----------
        starttime : datetime
            下载所需数据的起始时间
        endtime : datetime
            下载所需数据的起始时间
        pattern: list, optional
            模糊匹配参数
        Returns
        -------
            list
            下载的文件列表
        '''

        if endtime is None :
            endtime = starttime

        downfilelist = []

        nowdate = starttime
        while nowdate <= endtime :
            # 拼接H8 ftp 目录
            sourceRoot = os.path.join('/jma/netcdf', nowdate.strftime("%Y%m"), nowdate.strftime("%d"))
            sourceRoot = sourceRoot.replace('\','/')

            # 获取文件列表
            filelist = self.GetFileList(starttime, endtime, sourceRoot, pattern)
            
            # filelist = [f for f in filelist if f.startswith('NC_H08_') and f.endswith('.06001_06001.nc')]
            
            if len(filelist) == 0 :
                nowdate += datetime.timedelta(days=1)
                print('未匹配当前时间【%s】的文件' %(nowdate.strftime('%Y-%m-%d')))
                continue

            nowdate += datetime.timedelta(days=1)
            downfilelist.extend(filelist)

        return downfilelist
    def GetFileList(self, starttime, endtime, srcpath, pattern=None):
        ''' 根据输入时间,匹配获取H8 L1数据文件名  '''
        downfiles = []

        srcpath = srcpath.replace('\', '/')

        filelist = self.ftp.listdir(srcpath)
        filelist.sort()
        for filename in filelist :
            namelist = filename.split('_')
            nowdate = datetime.datetime.strptime('%s %s' %(namelist[2], namelist[3]), '%Y%m%d %H%M')

            if (nowdate < starttime) | (nowdate > endtime) :
                continue

            downflag = True
            # 根据传入的匹配参数,匹配文件名中是否包含相应的字符串
            if pattern is not None :
                if isinstance(pattern, list) :
                    for item in pattern :
                        if item in filename :
                            downflag = True
                            # break
                        else:
                            downflag = False
                            break
                elif isinstance(pattern, str) :
                    if pattern in filename :
                        downflag = True
                    else:
                        downflag = False

            if downflag :
                srcname = os.path.join(srcpath, filename)
                srcname = srcname.replace('\','/')

                downfiles.append(srcname)

        return downfiles
  • 定义了download和_download函数,用于将符合条件的文件下载到本地指定目录;
 def download(self, outdir, srcfile, blocksize=1*1024, skip=False):
        """通过ftp接口下载H8 L1数据文件"""

        if not os.path.exists(outdir):
            os.makedirs(outdir)
            print('成功创建路径:%s' %(outdir))

        if isinstance(srcfile, list) :
            count = len(srcfile)
            for srcname in srcfile:
                count -= 1
                self._download(outdir, srcname, blocksize=blocksize, skip=skip, count=count+1)

        elif isinstance(srcfile, str) :
            self._download(outdir, srcfile, blocksize=blocksize, skip=skip)

    def _download(self, outdir, srcname, blocksize=1*1024, skip=False, count=1):

        print('='*100)
        basename = os.path.basename(srcname)
        dstname = os.path.join(outdir, basename)

        if skip :
            return srcname

        if os.path.isfile(dstname) :
            print('文件已存在,跳过下载>>【%s】' %(dstname))
            return srcname

        stime = time.time()
        print(datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'),
              '开始下载文件【%d】: %s'%(count, srcname))

        if self.ftp.downloadFile(srcname, outdir, blocksize=blocksize):
            print(datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'),
                  '成功下载文件【%s】:%s' %(count, dstname))
        else:
            print(datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'),
                  '下载文件失败【%s】:%s' %(count, dstname))

        etime = time.time()
        print('下载文件共用%.2f秒' %(etime - stime))

        return srcname

此外,对于_download函数,还可以进一步修改,判断如果文件没有下载成功,则重新下载,这里也给出我改的一个函数:

    def _download(self, outdir, srcname, blocksize=1*1024, skip=False, count=1):
        print('=' * 100)
        basename = os.path.basename(srcname)
        dstname = os.path.join(outdir, basename)
    
        if skip:
            return srcname
    
        if os.path.isfile(dstname):
            print('文件已存在,跳过下载>>【%s】' % (dstname))
            return srcname
    
        stime = time.time()
        print(datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'),
              '开始下载文件【%d】: %s' % (count, srcname))
    
        while True:
            if self.ftp.downloadFile(srcname, outdir, blocksize=blocksize):
                print(datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'),
                      '成功下载文件【%s】:%s' % (count, dstname))
                break
            else:
                print(datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'),
                      '下载文件失败, 正在重新下载【%s】:%s' % (count, dstname))
        
        etime = time.time()
        print('下载文件共用%.2f秒' % (etime - stime))
    
        return srcname
  • 定义了check_data_completeness函数,用于检查是否存在数据文件缺失;

对于5km分辨率,10min一次的的:

def check_data_completeness(file_list, start_time, end_time):
    expected_num_files = (end_time - start_time).days * 144 + 144
    actual_num_files = len(file_list)

    if actual_num_files == expected_num_files:
        print("已经下载了全部数据。")
    else:
        print("有 %d 个数据文件缺失。" % (expected_num_files - actual_num_files))
        expected_file_names = []
        actual_file_names = []

        for i in range(expected_num_files):
            file_time = start_time + datetime.timedelta(minutes=i * 10)
            file_name = "NC_H08_%s_R21_FLDK.02401_02401.nc" % (file_time.strftime("%Y%m%d_%H%M"))
            expected_file_names.append(file_name)

        for file_path in file_list:
            file_name = os.path.basename(file_path)
            actual_file_names.append(file_name)

        missing_file_names = set(expected_file_names) - set(actual_file_names)

        for missing_file_name in missing_file_names:
            print("缺失文件:%s" % missing_file_name)

对于2km分辨率,30min一次的的:

def check_data_completeness(file_list, start_time, end_time):
    
    expected_num_files = (end_time - start_time).days *48 + 48  # 48 show 30min/time; 144 show 10min/time
    actual_num_files = len(file_list)

    if actual_num_files == expected_num_files:
        print("已经下载了全部数据。")
    else:
        print("有 %d 个数据文件缺失。" % (expected_num_files - actual_num_files))
        expected_file_names = []
        actual_file_names = []

        for i in range(expected_num_files):
            file_time = start_time + datetime.timedelta(minutes=i * 30)
            file_name = "NC_H08_%s_R21_FLDK.06001_06001.nc" % (file_time.strftime("%Y%m%d_%H%M"))
            expected_file_names.append(file_name)

        for file_path in file_list:
            file_name = os.path.basename(file_path)
            actual_file_names.append(file_name)

        missing_file_names = set(expected_file_names) - set(actual_file_names)

        for missing_file_name in missing_file_names:
            print("缺失文件:%s" % missing_file_name)
  • 具体下载方法:
    在主程序中,先调用search_ahi8_l1_netcdf函数搜索符合条件的文件列表,再通过循环调用download函数将文件下载到本地。
    在具体的操作过程中,需要按照实际情况替换FTPHOST、username和password等相关参数,以保证正确的网络连接。同时,还需要根据需求修改脚本中的时间范围等参数,以便精确地搜索和下载文件。

下载示例

下面主要下载了了:2017.01.01-2017.01.31日之间,整个区域上,5km分辨率,30min一次的所有数据,如何看空间分辨率在后面有数据介绍


FTPHOST='ftp.ptree.jaxa.jp'

# create an instance of the downloadH8 class
h8_downloader = downloadH8('xxx', 'xxx')
## 2016 1440240   loss
# search for H8 files for a specific date
start_time = datetime.datetime(2017, 1, 1)
end_time = datetime.datetime(2017, 1,31,23, 59, 59)
file_list = h8_downloader.search_ahi8_l1_netcdf(start_time, end_time,pattern=['R21','02401_02401'])


# 选取每30分钟的数据文件名
selected_files = []
for file in file_list:
    if file.endswith(".nc"):
        if file[40:42] in ["00", "30"]:
            selected_files.append(file)

# 打印选取的文件名
print(selected_files)


check_data_completeness(selected_files,start_time, end_time)

from tqdm import tqdm 

for file in tqdm(selected_files):
    
    
    try:
        h8_downloader.download('/media/DATA/kuihua8_5km/', file)
    except ValueError as e:
        print(str(e))
        os.remove(os.path.join('/media/DATA/kuihua8_5km/', os.path.basename(file)))
        h8_downloader.download('/media/DATA/kuihua8_5km/', file)

如果你想要下载5km,10min一次的,只需要把选取每30分钟的数据文件名那段循环删掉,直接从file_list中进行下载即可

def check_data_completeness(file_list, start_time, end_time):
    
    expected_num_files = (end_time - start_time).days *144 + 144 # 48 show 30min/time; 144 show 10min/time
    actual_num_files = len(file_list)

    if actual_num_files == expected_num_files:
        print("已经下载了全部数据。")
    else:
        print("有 %d 个数据文件缺失。" % (expected_num_files - actual_num_files))
        expected_file_names = []
        actual_file_names = []

        for i in range(expected_num_files):
            file_time = start_time + datetime.timedelta(minutes=i * 10)
            file_name = "NC_H08_%s_R21_FLDK.02401_02401.nc" % (file_time.strftime("%Y%m%d_%H%M"))
            expected_file_names.append(file_name)

        for file_path in file_list:
            file_name = os.path.basename(file_path)
            actual_file_names.append(file_name)

        missing_file_names = set(expected_file_names) - set(actual_file_names)

        for missing_file_name in missing_file_names:
            print("缺失文件:%s" % missing_file_name)
FTPHOST='ftp.ptree.jaxa.jp'

# create an instance of the downloadH8 class
h8_downloader = downloadH8('xxx', 'xxx')
## 2016 1440240   loss
# search for H8 files for a specific date
start_time = datetime.datetime(2017, 1, 1)
end_time = datetime.datetime(2017, 1,31,23, 59, 59)
file_list = h8_downloader.search_ahi8_l1_netcdf(start_time, end_time,pattern=['R21','02401_02401'])


# 打印选取的文件名
print(file_list)


check_data_completeness(selected_files,start_time, end_time)

from tqdm import tqdm 

for file in tqdm(file_list):
    
    
    try:
        h8_downloader.download('/media/DATA/kuihua8_5km/', file)
    except ValueError as e:
        print(str(e))
        os.remove(os.path.join('/media/DATA/kuihua8_5km/', os.path.basename(file)))
        h8_downloader.download('/media/DATA/kuihua8_5km/', file)


数据介绍

这里,对于数据名称做简单介绍:

NC_H08_20170101_0100_R21_FLDK.06001_06001.nc

NC_H08_20170101_0030_R21_FLDK.02401_02401.n

 NC_Hnn_YYYYMDD_hhmm_Rbb_FLDK.xxxxx_yyyyy.nc

一般下载下来的数据名称如上所示:

NC_H08 : nn=08代表葵花8号,nn=09表示葵花9号

YYYYMDD: 年月日
hhmm: 小时分钟
Rbb:Rbb = R21,表示的是整个区域,波段从 “01” 到 “16”;Rbb=R14,表示的是日本区域,波段为14

xxxxx: (“2401”: 5km resolution,
“6001”: 2km resolution, )
yyyyy: (“2401”: 5km resolution,
“6001”: 2km resolution, )

参考

https://mp.weixin.qq.com/s/b_BlfXYTTY0oAnb7U0tCOQ
https://mp.weixin.qq.com/s/EFT8D7ElvlcTBXQ_LkAbRw

风语者!平时喜欢研究各种技术,目前在从事后端开发工作,热爱生活、热爱工作。