Source code for pickle_data

import pickle
import numpy as np
import pandas as pd


[docs]def clean_and_pickle(espresso_path: str, harps_path: str, pickle_path: str): """ Cleans the data from the ESPRESSO and HARPS instruments and pickles the results. Parameters ---------- espresso_path : str File path for the ESPRESSO data file. harps_path : str File path for the HARPS data file. pickle_path : str Destination path to save the pickled data. Notes ----- The function adjusts time values, filters out specific time points and handles missing values according to those specifified in D. S. Demangeon et al. (2021). The cleaned data is then saved into a pickle file format for persistence. """ # --- HARPS # Define column titles column_titles = [ "Time", "RV", "e_RV", "Halpha", "e_Halpha", "Hbeta", "e_Hbeta", "Hgamma", "e_Hgamma", "NaD", "e_NaD", "Sindex", "e_Sindex", "FWHM", "BIS", ] try: # Load HARPS data harps_df = pd.read_csv(harps_path, delim_whitespace=True, names=column_titles) harps_df["Time"] += 2457000 # Adjust the time column to BJD # Filter out specific BJDs from HARPS data excluded_bjds = [2458503.795048, 2458509.552019, 2458511.568314, 2458512.581045] cleaned_harps_df = harps_df[~harps_df["Time"].isin(excluded_bjds)].copy() # Replace invalid FWHM and BIS values with NaN cleaned_harps_df["FWHM"] = cleaned_harps_df["FWHM"].astype(str) cleaned_harps_df["FWHM"].replace("---", np.nan, inplace=True) cleaned_harps_df["FWHM"] = pd.to_numeric( cleaned_harps_df["FWHM"], errors="coerce" ) cleaned_harps_df["BIS"].replace("---", np.nan, inplace=True) except Exception as e: print(f"Error processing HARPS data: {e}") # --- ESPRESSO # Define column titles espresso_column_titles = [ "Time", "RV", "e_RV", "FWHM", "e_FWHM", "BIS", "e_BIS", "Contrast", "e_Contrast", "Sindex", "e_Sindex", "Halpha", "e_Halpha", "NaD", "e_NaD", "BERV", "Inst", ] try: # Load ESPRESSO data espresso_df = pd.read_csv( espresso_path, delim_whitespace=True, names=espresso_column_titles ) espresso_df["Time"] += 2400000 # Adjust the time column to BJD # Filter out specific BJDs from ESPRESSO data with a tolerance excluded_bjds = [2458645.496, 2458924.639, 2458924.645] tolerance = 1e-3 cleaned_espresso_df = espresso_df.copy() cleaned_espresso_df = cleaned_espresso_df[ ~cleaned_espresso_df["Time"].apply( lambda x: any(abs(x - bjd) < tolerance for bjd in excluded_bjds) ) ] # Split the data into pre and post fiber change cleaned_pre_df = cleaned_espresso_df[cleaned_espresso_df["Inst"] == "Pre"] cleaned_post_df = cleaned_espresso_df[cleaned_espresso_df["Inst"] == "Post"] except Exception as e: print(f"Error processing ESPRESSO data: {e}") # --- Organise data for pickling data_dict = { "ESPRESSO_pre": cleaned_pre_df, "ESPRESSO_post": cleaned_post_df, "HARPS": cleaned_harps_df, } try: # Save to pickle file with open(pickle_path, "wb") as handle: pickle.dump(data_dict, handle, protocol=pickle.HIGHEST_PROTOCOL) print(f"Data cleand and pickled at {pickle_path}") except Exception as e: print(f"Error saving data to pickle: {e}")
[docs]def unpickle_data(filepath): """ Loads data from a pickle file. Parameters ---------- filepath : str Path to the pickle file to be loaded. Returns ------- data : dict A dictionary containing the data loaded from the pickle file. Raises ------ FileNotFoundError If the specified file does not exist. EOFError If the file is empty or improperly formatted, indicating end of file reached without any data. Exception For other issues that might occur during the loading process. """ try: # Open the pickle file in binary read mode with open(filepath, "rb") as file: data = pickle.load(file) return data except FileNotFoundError: raise FileNotFoundError(f"The file {filepath} was not found.") except EOFError: raise EOFError( f"No data found in file {filepath}. The file may be corrupted or empty." ) except Exception as e: raise Exception(f"An error occurred while loading the pickle file: {e}")