Source code for hustle_tools.run_pipeline

import os
import glob
import numpy as np

from hustle_tools.read_and_write_config import parse_config
from hustle_tools.read_and_write_config import write_config

from hustle_tools.plotting import plot_one_spectrum
from hustle_tools.plotting import plot_many_spectra
from hustle_tools.plotting import plot_spec_gif
from hustle_tools.plotting import plot_2d_spectra
from hustle_tools.plotting import plot_raw_whitelightcurve
from hustle_tools.plotting import quicklookup

from hustle_tools.stage_0 import collect_and_move_files
from hustle_tools.stage_0 import get_files_from_mast
from hustle_tools.stage_0 import locate_target
from hustle_tools.stage_0 import check_subarray

from hustle_tools.stage_1 import load_data_S1
from hustle_tools.stage_1 import save_data_S1
from hustle_tools.stage_1 import correct_hst_flags
from hustle_tools.stage_1 import uniform_value_bkg_subtraction
from hustle_tools.stage_1 import Pagul_bckg_subtraction
from hustle_tools.stage_1 import column_by_column_subtraction
from hustle_tools.stage_1 import track_bkgstars
from hustle_tools.stage_1 import track_0thOrder
from hustle_tools.stage_1 import free_iteration_rejection
from hustle_tools.stage_1 import fixed_iteration_rejection
from hustle_tools.stage_1 import laplacian_edge_detection
from hustle_tools.stage_1 import spatial_smoothing
from hustle_tools.stage_1 import refine_location

from hustle_tools.stage_2 import load_data_S2
from hustle_tools.stage_2 import save_data_S2
from hustle_tools.stage_2 import get_calibration_0th
from hustle_tools.stage_2 import get_trace_solution
from hustle_tools.stage_2 import sens_correct
from hustle_tools.stage_2 import time_and_relative_detrending_in_space
from hustle_tools.stage_2 import determine_ideal_halfwidth
from hustle_tools.stage_2 import standard_extraction
from hustle_tools.stage_2 import optimal_extraction
from hustle_tools.stage_2 import clean_spectra
from hustle_tools.stage_2 import align_spectra
from hustle_tools.stage_2 import align_profiles
from hustle_tools.stage_2 import remove_zeroth_order


[docs] def run_pipeline(config_files_dir, stages=(0, 1, 2)): """Wrapper for all Stages of the HUSTLE-tools pipeline. Args: config_files_dir (str): folder which contains the .hustle files needed to run the stages you want to run. stages (tuple, optional): the stages that you want to run. Defaults to (0, 1, 2). """ ######## Run Stage 0 ######## if 0 in stages: # read out the stage 0 config stage0_config = glob.glob(os.path.join(config_files_dir,'stage_0*'))[0] stage0_dict = parse_config(stage0_config) # run data download if stage0_dict['do_download']: get_files_from_mast(stage0_dict['programID'], stage0_dict['target_name'], stage0_dict['visit_number'], stage0_dict['toplevel_dir'], token=stage0_dict['token'], extensions=stage0_dict['extensions'], verbose=stage0_dict['verbose']) # collect and move files if stage0_dict['do_organize']: if not stage0_dict['filesfrom_dir']: stage0_dict['filesfrom_dir'] = stage0_dict['toplevel_dir'] # if the data weren't pre-downloaded, then they are here collect_and_move_files(stage0_dict['visit_number'], stage0_dict['filesfrom_dir'], stage0_dict['toplevel_dir'], stage0_dict['verbose']) # check if output directory exists, otherwise create output directory output_dir = os.path.join(stage0_dict['toplevel_dir'],'outputs') if not os.path.exists(output_dir): os.makedirs(output_dir) # locate target in direct image if stage0_dict['do_locate']: # check for direct image / spec image discrepancies xdiscs, ydiscs = check_subarray(os.path.join(stage0_dict['toplevel_dir'],'directimages/or01dr001_flt.fits'), sorted(glob.glob(os.path.join(os.path.join(stage0_dict['toplevel_dir'],'specimages'),'*flt.fits')))) source_x, source_y = locate_target(os.path.join(stage0_dict['toplevel_dir'],'directimages/or01dr001_flt.fits')) # modify config keyword stage0_dict['location'] = [source_x,source_y] # write config config_dir = os.path.join(output_dir,'stage0') if not os.path.exists(config_dir): os.makedirs(config_dir) write_config(stage0_dict, '', 0, config_dir) # create quicklook gif if stage0_dict['do_quicklook']: quicklookup(stage0_dict['toplevel_dir'], stage0_dict['traces_included'], stage0_dict['verbose'], stage0_dict['show_plots'], stage0_dict['save_plots'], config_dir) ####### Run Stage 1 ####### if 1 in stages: # read out the stage 1 config stage1_config = glob.glob(os.path.join(config_files_dir,'stage_1*'))[0] stage1_dict = parse_config(stage1_config) # read the 'location' keyword from the Stage 0 config try: stage0_output_config = os.path.join(stage1_dict['toplevel_dir'],'outputs/stage0/stage_0_.hustle') stage0_output_dict = parse_config(stage0_output_config) # and grab the location of the source if stage0_output_dict['location'] != None: if stage1_dict['verbose'] > 0: print("Stage 0 .hustle located, using 'location' parameter supplied by Stage 0 fitting process.") stage1_dict['location'] = stage0_output_dict['location'] except FileNotFoundError: if stage1_dict['verbose'] > 0: print("No Stage 0 .hustle located, using 'location' parameter supplied by Stage 1 .hustle instead.") # read data obs = load_data_S1(stage1_dict['toplevel_dir'], skip_first_fm = stage1_dict['skip_first_fm'], skip_first_or = stage1_dict['skip_first_or'], verbose = stage1_dict['verbose']) # supply obs with new attributes if you can if stage1_dict['location'] is not None: obs.attrs['target_posx'] = stage1_dict['location'][0] obs.attrs['target_posy'] = stage1_dict['location'][1] # create output directory output_dir = os.path.join(stage1_dict['toplevel_dir'],'outputs') stage_dir = os.path.join(output_dir,'stage1') run_dir = os.path.join(stage_dir,stage1_dict['output_run']) if not os.path.exists(run_dir): os.makedirs(run_dir) # hst flag corrections if stage1_dict['do_hst_flags']: obs = correct_hst_flags(obs, stage1_dict['hst_flags'], stage1_dict['hst_replace'], verbose=stage1_dict['verbose'], show_plots=stage1_dict['show_plots'], save_plots=stage1_dict['save_plots'], output_dir=run_dir) # temporal removal fixed iterations if stage1_dict['do_fixed_iter']: obs = fixed_iteration_rejection(obs, stage1_dict['fixed_sigmas'], stage1_dict['replacement'], verbose=stage1_dict['verbose'], show_plots=stage1_dict['show_plots'], save_plots=stage1_dict['save_plots'], output_dir=run_dir) # temporal removal free iterations if stage1_dict['do_free_iter']: obs = free_iteration_rejection(obs, threshold=stage1_dict['free_sigma'], verbose=stage1_dict['verbose'], show_plots=stage1_dict['show_plots'], save_plots=stage1_dict['save_plots'], output_dir=run_dir) # spatial removal by led if stage1_dict['do_led']: obs = laplacian_edge_detection(obs, sigma=stage1_dict['led_threshold'], factor=stage1_dict['led_factor'], n=stage1_dict['led_n'], build_fine_structure=stage1_dict['fine_structure'], contrast_factor=stage1_dict['contrast_factor'], verbose=stage1_dict['verbose'], show_plots=stage1_dict['show_plots'], save_plots=stage1_dict['save_plots'], output_dir=run_dir) # spatial removal by smoothing if stage1_dict['do_smooth']: obs = spatial_smoothing(obs, type=stage1_dict["smth_type"], kernel=stage1_dict["smth_kernel"], sigma=stage1_dict["smth_threshold"], bounds_set=stage1_dict["smth_bounds"], verbose=stage1_dict['verbose'], show_plots=stage1_dict['show_plots'], save_plots=stage1_dict['save_plots'], output_dir=run_dir) # background subtraction with a uniform bckg value if stage1_dict['do_uniform']: obs = uniform_value_bkg_subtraction(obs, fit=stage1_dict['fit'], bounds=stage1_dict['bounds'], hist_min=stage1_dict['hist_min'], hist_max=stage1_dict['hist_max'], hist_bins=stage1_dict['hist_bins'], verbose=stage1_dict['verbose'], show_plots=stage1_dict['show_plots'], save_plots=stage1_dict['save_plots'], output_dir=run_dir) # background subtraction with the Pagul et al. image scaled if stage1_dict['do_Pagul']: obs = Pagul_bckg_subtraction(obs, pagul_path=stage1_dict['path_to_Pagul'], masking_parameter=stage1_dict['mask_parameter'], smooth_fits=stage1_dict['smooth_fits'], smooth_parameter=stage1_dict['smoothing_param'], median_on_columns=stage1_dict['median_columns'], verbose=stage1_dict['verbose'], show_plots=stage1_dict['show_plots'], save_plots=stage1_dict['save_plots'], output_dir=run_dir) # background subtraction by columnal medians if stage1_dict['do_column']: obs = column_by_column_subtraction(obs, rows=stage1_dict['rows'], sigma=stage1_dict['col_sigma'], mask_trace=stage1_dict['mask_trace'], width=stage1_dict['dist_from_trace'], verbose=stage1_dict['verbose'], show_plots=stage1_dict['show_plots'], save_plots=stage1_dict['save_plots'], output_dir=run_dir) # refine location of the source in the direct image using COM fitting if stage1_dict['do_location']: refine_location(obs, verbose=stage1_dict['verbose'], show_plots=stage1_dict['show_plots'], save_plots=stage1_dict['save_plots'], output_dir=run_dir) # update the dict so that we can see this info in the output config stage1_dict['location'] = [obs.attrs['target_posx'], obs.attrs['target_posy']] # displacements by 0th order tracking obs["0th_order_pos"] = (("exp_time", "xy"), np.zeros((obs.exp_time.data.shape[0],2))) # placeholder in case you don't do this step if stage1_dict['do_0thtracking']: # FIX (Issue #39): Hard-coded guess values are used to shift from # direct image pos to spec image. Hardcoding is something that we # want to avoid, so we need to think of a better way. track_0thOrder(obs, guess=[100,150], verbose=stage1_dict['verbose'], show_plots=stage1_dict['show_plots'], save_plots=stage1_dict['save_plots'], output_dir=run_dir) # displacements by background stars obs["meanstar_disp"] = (("exp_time", "xy"), np.zeros((obs.exp_time.data.shape[0],2))) # placeholder in case you don't do this step if stage1_dict['do_bkg_stars']: track_bkgstars(obs, bkg_stars=stage1_dict['bkg_stars_loc'], window=stage1_dict['bkg_window'], verbose=stage1_dict['verbose'], show_plots=stage1_dict['show_plots'], save_plots=stage1_dict['save_plots'], output_dir=run_dir) # create quicklook gif if stage1_dict['do_quicklook']: if stage1_dict['include_hst_dq']: obs.data_quality.values += np.where(obs.hst_dq.values >= 1, 1, 0) obs.data_quality.values = np.where(obs.data_quality.values >= 1, 1, 0) quicklookup(obs, stage1_dict['traces_included'], stage1_dict['verbose'], stage1_dict['show_plots'], stage1_dict['save_plots'], run_dir) # save results if stage1_dict['do_save']: save_data_S1(obs, run_dir) # write config write_config(stage1_dict, stage1_dict['output_run'], 1, run_dir) ####### Run Stage 2 ####### if 2 in stages: # read out the stage 2 config stage2_config = glob.glob(os.path.join(config_files_dir,'stage_2*'))[0] stage2_dict = parse_config(stage2_config) # read data S2_data_path = os.path.join(stage2_dict['toplevel_dir'], os.path.join('outputs/stage1',stage2_dict['input_run'])) obs = load_data_S2(S2_data_path) # get the location from the obs.nc file stage2_dict['location'] = [obs.attrs['target_posx'], obs.attrs['target_posy']] # create output directory output_dir = os.path.join(stage2_dict['toplevel_dir'],'outputs') stage_dir = os.path.join(output_dir,'stage2') run_dir = os.path.join(stage_dir,stage2_dict['output_run']) if not os.path.exists(run_dir): os.makedirs(run_dir) if stage2_dict['correct_zero']: # calibrate 0th order x0th, y0th = get_calibration_0th(obs, source_pos=stage2_dict['location'], path_to_cal=stage2_dict['path_to_cal'], verbose=stage2_dict['verbose'], show_plots=stage2_dict['show_plots'], save_plots=stage2_dict['save_plots'], output_dir=run_dir) # 0th order removal remove_zeroth_order(obs, zero_pos = [x0th, y0th], rmin = 85, rmax = 500, rwidth = 3, fit_profile = True, verbose = stage2_dict['verbose'], show_plots = stage2_dict['show_plots'], save_plots = stage2_dict['save_plots'], output_dir = run_dir) # iterate over orders for i, order in enumerate(stage2_dict['traces_to_conf']): # configure trace trace_x, trace_y, wav, widths, fs = get_trace_solution(obs, order=order, source_pos=stage2_dict['location'], refine_calibration=stage2_dict['refine_fit'], path_to_cal=stage2_dict['path_to_cal'], verbose=stage2_dict['verbose'], show_plots=stage2_dict['show_plots'], save_plots=stage2_dict['save_plots'], output_dir=run_dir) # tardis clean if stage2_dict['do_tardis']: obs = time_and_relative_detrending_in_space(obs, trace_x, np.median(trace_y,axis=0), order, sigmas=stage2_dict['tardis_sigma'], flux_threshold=stage2_dict['flux_threshold'], windows=stage2_dict['tardis_window'], replacement=stage2_dict['tardis_replace'], verbose=stage2_dict['verbose'], show_plots=stage2_dict['show_plots'], save_plots=stage2_dict['save_plots'], output_dir=run_dir) # extract if stage2_dict['method'] == 'box': # determine ideal halfwidth if stage2_dict['determine_hw']: halfwidth = determine_ideal_halfwidth(obs, order, trace_x=trace_x, trace_y=trace_y, wavs=wav, indices=stage2_dict['indices'], verbose=stage2_dict['verbose'], show_plots=stage2_dict['show_plots'], save_plots=stage2_dict['save_plots'], output_dir=run_dir) if stage2_dict['verbose'] > 0: print("Optimized half-width for extraction of order {}: {} pixels".format(order,halfwidth)) else: halfwidth = stage2_dict['halfwidths_box'][i] # box extraction spec, spec_err = standard_extraction(obs, halfwidth=halfwidth, trace_x=trace_x, trace_y=trace_y, order=order, masks=stage2_dict['mask_objs'], verbose=stage2_dict['verbose'], show_plots=stage2_dict['show_plots'], save_plots=stage2_dict['save_plots'], output_dir=run_dir) elif stage2_dict['method'] == 'optimal': # optimum extraction spec, spec_err = optimal_extraction(obs, trace_x, trace_y, masks=stage2_dict['mask_objs'], width = stage2_dict['halfwidths_opt'][i], prof_type = stage2_dict['aperture_type'], show_plots=stage2_dict['show_plots'], save_plots=stage2_dict['save_plots'], output_dir=run_dir) # do sensitivity if stage2_dict['sens_correction']: spec, spec_err = sens_correct(spec, spec_err, wav, fs) # do alignment x_shifts, y_shifts = False, False if stage2_dict['align']: spec, spec_err, x_shifts = align_spectra(obs, spec, spec_err, order, trace_x=trace_x, wavelengths=wav, align=stage2_dict['apply_align'], verbose=stage2_dict['verbose'], show_plots=stage2_dict['show_plots'], save_plots=stage2_dict['save_plots'], output_dir=run_dir) y_shifts = align_profiles(obs, trace_x, trace_y, order, width = 25, verbose=stage2_dict['verbose'], show_plots=stage2_dict['show_plots'], save_plots=stage2_dict['save_plots'], output_dir=run_dir) # do clean spectra if stage2_dict['outlier_sigma']: spec = clean_spectra(spec, sigma=stage2_dict['outlier_sigma'], verbose=stage2_dict['verbose']) # do plotting if (stage2_dict['show_plots'] > 0 or stage2_dict['save_plots'] > 0): plot_one_spectrum(wav, np.median(spec[:, :],axis=0), order, show_plot=(stage2_dict['show_plots'] > 0), save_plot=(stage2_dict['save_plots'] > 0), filename='1Dspec-med_order{}'.format(order), output_dir=run_dir, ) plot_many_spectra(wav, spec, order, show_plot=(stage2_dict['show_plots'] > 0), save_plot=(stage2_dict['save_plots'] > 0), filename='1Dspec-all_order{}'.format(order), output_dir=run_dir, ) plot_2d_spectra(wav, spec, show_plot = (stage2_dict['show_plots'] > 0), save_plot = (stage2_dict['save_plots'] > 0), filename = '2Dspec_order{}'.format(order), output_dir = run_dir) plot_raw_whitelightcurve(obs.exp_time.data,spec, order=order, show_plot = (stage2_dict['show_plots'] > 0), save_plot = (stage2_dict['save_plots'] > 0), filename = 'rawwlc_order{}'.format(order), output_dir = run_dir) plot_spec_gif(wav,spec, order=order, show_plot=(stage2_dict['show_plots'] > 0), save_plot=(stage2_dict['save_plots'] > 0), filename='1Dspec_order{}'.format(order), output_dir=run_dir) # save order xarray save_data_S2(obs, spec, spec_err, trace_x, trace_y, widths, wav, x_shifts, y_shifts, order, output_dir=run_dir) # write config write_config(stage2_dict, stage2_dict['output_run'], 2, run_dir)