Source code for pyValEIA.io.download

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is
# unlimited.
# ----------------------------------------------------------------------------
"""Download functions for supported data."""

import datetime as dt
import glob
import requests
import os
import zipfile

from pyValEIA import logger

[docs] swarm_url = "https://swarm-diss.eo.esa.int/?do=download&file=swarm%2FLevel"
[docs] def download_and_unzip_swarm(ddate, satellite, out_dir, base_url=swarm_url, level='1b', baseline='Latest_baselines', instrument='EFI', dataset='LP', f_end='0602', stime_str='000000', etime_str='235959', num_days=0, remove=False): """Download daily Swarm files and unzip them into instrument-date dirs. Parameters ---------- ddate: datetime object Date of the desired Swarm file satellite : str Satellite string 'A', 'B', or 'C' out_dir : str String specifying base directory for file output base_url : str Base URL where data can be found before Level specification (default=`swarm_url`) level : str Data level, only tested on '1b' (default='1b') baseline : str Desired baseline, have not tested 'Entire_mission_data' (default='Latest_baselines') instrument : str Desired insturment acronym, e.g. 'EFI' is the Electric Field Instrument (default='EFI') dataset : str Desired dataset acronym from instrument, e.g. 'LP' is Langmuir Probe (default='LP') f_end : str For different data products there are different numbers at the end The most common for EFIxLP is '0602' where '0602' represents the file version. Other data products also have a record type string. (default='0602') stime_str : str Starting time using the string format "HHMMSS". Most files start with "000000", but if the file is not the whole day it will be different. Check website if download fails (default="000000") etime_str : str Ending time using the string format "HHMMSS". Most files end with "235959", but if the file is not the whole day it will be different. Check website if download fails (default="235959") num_days : int Number of days after the starting date to be downloaded after the initial day (default=0) remove : bool If True, remove zip archive after unpacking (default=False) Notes ----- Different file options found at: https://swarm-diss.eo.esa.int/# File format information found at: https://swarmhandbook.earth.esa.int/article/product Raises ------ ValueError If an unknown level is supplied """ # Adjsut the name based on if it is level 1b or level 2daily full_url = ''.join([base_url, level, "%2F", baseline, "%2F", instrument, 'x_' if level == '1b' else '%2F', dataset]) # Create the output folder yr = ddate.year mnth = ddate.month dy = ddate.day out_folder = os.path.join(out_dir, instrument, '_'.join(['Sat', satellite]), ddate.strftime('%Y')) # Make the path if it does not exist if not os.path.exists(out_folder): logger.info(f'Making path {out_folder}') os.makedirs(out_folder) # Start at first day and go for num_days start_date = dt.datetime(yr, mnth, dy) end_date = start_date + dt.timedelta(days=num_days) # Start with start date and go until end date is reached while start_date <= end_date: date_str = start_date.strftime("%Y%m%d") f_bse = "SW_OPER_" d_str = ''.join([date_str, "T", stime_str, "_", date_str, "T", etime_str, "_", f_end]) if level == '1b': filename = ''.join([f_bse, instrument, satellite, "_", dataset, "_1B_", d_str, ".CDF.ZIP"]) elif level == '2daily': filename = ''.join([f_bse, instrument, satellite, dataset, "_2F_", d_str, ".ZIP"]) else: raise ValueError('unknown level: {:}'.format(level)) # Set full file URL file_url = ''.join([full_url, "%2FSat_", satellite, "%2F", filename]) # Set the full file path for the zip archive zip_path = os.path.join(out_folder, filename) # Set the output folder for unzipped data extract_folder = os.path.join(out_folder, date_str) # Find file if it already exists if level == '1b': efile = ''.join([f_bse, instrument, satellite, "_", dataset, "_1B_", d_str, "*.cdf"]) elif level == '2daily': efile = ''.join([f_bse, instrument, satellite, dataset, "_2F_", d_str, "*.cdf"]) extracted_files = os.path.join(extract_folder, efile) found_file = extracted_files if len(glob.glob(extracted_files)) > 0: found_file = glob.glob(extracted_files)[0] if os.path.exists(found_file): logger.info(f"File already exists: {found_file}.Skipping download.") else: # Download file from the file URL response = requests.get(file_url) if response.status_code == 200: with open(zip_path, 'wb') as fout: fout.write(response.content) logger.info("Downloading: {:s}".format(filename)) # Unzip file into date folder extract_folder = os.path.join(out_folder, date_str) os.makedirs(extract_folder, exist_ok=True) try: # Extract the zip archive with zipfile.ZipFile(zip_path, 'r') as zip_ref: zip_ref.extractall(extract_folder) logger.info("Extracted to: {:s}".format(extract_folder)) # Remove zip archive, if desired if remove: os.remove(zip_path) except zipfile.BadZipFile: logger.warning( f"Failed filename {filename} does not exist") # Cycle to the next day start_date += dt.timedelta(days=1) return