Source code for periodograms

import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from astropy.timeseries import LombScargle
from pickle_data import unpickle_data

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")))

# --- Import relevant data
root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
pickle_file_path = root_dir + "/datasets/cleaned_data_20240531.pickle"
X = unpickle_data(pickle_file_path)
X_pre, X_post, X_harps = X["ESPRESSO_pre"], X["ESPRESSO_post"], X["HARPS"]
n_pre, n_post, n_harps = len(X_pre["RV"]), len(X_post["RV"]), len(X_harps["RV"])

# --- Concatenate data
ESPRESSO_df = pd.concat([X_pre, X_post], ignore_index=True)
ESPRESSO_times = np.block([X_pre["Time"], X_post["Time"]])
HARPS_df = X_harps
HARPS_times = X_harps["Time"]
time = np.concatenate([ESPRESSO_times, HARPS_times])

# --- GLSP parameters
samples_per_peak = 3000
min_frequency = 0.002025227467386652
max_frequency = 0.5177264355163222

ESPRESSO_activity_indices = ["RV", "FWHM", "BIS", "Sindex", "NaD", "Halpha", "Contrast"]
HARPS_activity_indices = [
    "RV",
    "FWHM",
    "BIS",
    "Sindex",
    "NaD",
    "Halpha",
    "Hbeta",
    "Hgamma",
]


# --- GLSP functions
[docs]def convert_to_microhertz(frequency_in_days):
    """
    Convert frequency from days to microhertz.

    Parameters:
    -----------
        frequency_in_days (float): Frequency in days.

    Returns:
    --------
        float: Frequency in microhertz.
    """
    return frequency_in_days * 1e6 / 86400


[docs]def convert_to_days(frequency_in_microhertz):
    """
    Convert frequency from microhertz to days.

    Parameters:
    -----------
        frequency_in_microhertz (float): Frequency in microhertz.

    Returns:
    --------
        float: Frequency in days.
    """
    return frequency_in_microhertz * 86400 / 1e6


[docs]def frequency_to_period(frequency):
    """
    Convert frequency to period.

    Parameters:
    -----------
        frequency (float): Frequency in microhertz.

    Returns:
    --------
        float: Period in days.
    """
    return 1 / (frequency * 86400 * 1e-6)


[docs]def compute_periodogram(
    time,
    data,
    min_frequency=min_frequency,
    max_frequency=max_frequency,
    samples_per_peak=samples_per_peak,
    ESPRESSO=False,
):
    """
    Compute the Lomb-Scargle periodogram for given data and time arrays.

    Parameters:
    -----------
        time (array): Time data points.
        data (array): Data points corresponding to time.
        min_frequency (float): Minimum frequency to analyze.
        max_frequency (float): Maximum frequency to analyze.
        samples_per_peak (int): Number of samples per peak for the analysis.
        ESPRESSO (bool): Flag to indicate whether data is from ESPRESSO or not (default False).

    Returns:
    --------
        tuple: Tuple containing frequency array, power array, and FAP thresholds.
    """
    frequency, power = LombScargle(time, data).autopower(
        minimum_frequency=min_frequency,
        maximum_frequency=max_frequency,
        normalization="standard",
        samples_per_peak=samples_per_peak,
    )
    fap_levels = [0.001, 0.01, 0.1]
    fap_thresholds = LombScargle(time, data).false_alarm_level(fap_levels)
    return frequency, power, fap_thresholds


[docs]def plot_periodogram(
    time,
    data,
    activity_indices,
    true_periods,
    period_colors,
    min_frequency=min_frequency,
    max_frequency=max_frequency,
    samples_per_peak=samples_per_peak,
    ESPRESSO=False,
):
    """
    Generate periodogram plots for the given data.

    Parameters:
    -----------
        time (array): Time data points.
        data (DataFrame): Data containing the activity indices.
        activity_indices (list): List of indices to be plotted.
        true_periods (list): True periods for reference lines.
        period_colors (list): Colors for the reference lines.
        min_frequency (float): Minimum frequency to analyze.
        max_frequency (float): Maximum frequency to analyze.
        samples_per_peak (int): Number of samples per peak for the analysis.
        ESPRESSO (bool): Flag to indicate whether data is from ESPRESSO or not (default False).

    Raises:
    -------
        ValueError: If time or data arrays are empty.
        RuntimeError: If computation of periodograms fails.
    """
    if len(time) == 0 or len(data) == 0:
        raise ValueError("Time or data arrays are empty.")

    periodograms = {}

    try:
        for index in activity_indices:
            mask = ~np.isnan(time) & ~np.isnan(data[index])
            cleaned_time = time[mask]
            cleaned_data = data[index][mask]

            if len(cleaned_time) > 0 and len(cleaned_data) > 0:
                frequency, power, fap_thresholds = compute_periodogram(
                    cleaned_time,
                    cleaned_data,
                    min_frequency,
                    max_frequency,
                    samples_per_peak,
                    ESPRESSO,
                )
                periodograms[index] = (frequency, power)
            else:
                raise ValueError(f"Not enough valid data points for index {index}")

        observation_indicator = np.ones_like(time)
        window_frequency, window_power = LombScargle(
            time, observation_indicator
        ).autopower(
            minimum_frequency=min_frequency,
            maximum_frequency=max_frequency,
            normalization="standard",
            samples_per_peak=samples_per_peak,
        )
        periodograms["WF"] = (window_frequency, window_power)

    except Exception as e:
        raise RuntimeError(f"Error computing periodograms: {e}")

    # Plotting
    fig, axes = plt.subplots(len(periodograms), 1, figsize=(10, 20), sharex=True)

    for i, (ax, (key, (frequency, power))) in enumerate(
        zip(axes, periodograms.items())
    ):
        fap_01 = fap_thresholds[0]  # 0.1% FAP level
        fap_1 = fap_thresholds[1]  # 1% FAP level
        fap_10 = fap_thresholds[2]  # 10% FAP level
        frq = convert_to_microhertz(frequency)
        ax.plot(frq, power, color="black", label=key)
        ax.set_ylabel(key, fontsize=20)
        if i < len(axes) - 1:
            for peak, color in zip(true_periods, period_colors):
                ax.axvline(
                    x=convert_to_microhertz(1 / peak),
                    color=color,
                    linestyle="--",
                    linewidth=1.2,
                )

        if i < len(axes) - 1:
            ax.axhline(fap_01, color="black", linestyle="dotted", label="0.1% FAP")
            ax.axhline(fap_1, color="black", linestyle="dashdot", label="1% FAP")
            ax.axhline(fap_10, color="black", linestyle="dashed", label="10% FAP")

        if i == 0 and ESPRESSO:
            ax.text(
                -0.1,
                0.46,
                r"$P_{rot}$",
                color="black",
                verticalalignment="top",
                size=18,
            )
            ax.text(
                0.40,
                0.46,
                r"$\frac{P_{rot}}{2}$",
                color="black",
                verticalalignment="top",
                size=18,
            )
            ax.text(1.45, 0.45, r"$P_d$", color="b", verticalalignment="top", size=20)
            ax.text(3.05, 0.45, r"$P_c$", color="g", verticalalignment="top", size=20)
            ax.text(5.04, 0.45, r"$P_b$", color="r", verticalalignment="top", size=20)
            ax.text(6.35, 0.4, "0.1%", color="black", verticalalignment="top", size=20)
            ax.text(6.35, 0.33, "1%", color="black", verticalalignment="top", size=20)
            ax.text(6.35, 0.26, "10%", color="black", verticalalignment="top", size=20)

        if i == 0 and not ESPRESSO:
            ax.text(
                -0.1,
                0.245,
                r"$P_{rot}$",
                color="black",
                verticalalignment="top",
                size=18,
            )
            ax.text(
                0.35,
                0.245,
                r"$\frac{P_{rot}}{2}$",
                color="black",
                verticalalignment="top",
                size=18,
            )
            ax.text(1.45, 0.24, r"$P_d$", color="b", verticalalignment="top", size=20)
            ax.text(3.05, 0.24, r"$P_c$", color="g", verticalalignment="top", size=20)
            ax.text(5.04, 0.24, r"$P_b$", color="r", verticalalignment="top", size=20)
            ax.text(6.35, 0.18, "0.1%", color="black", verticalalignment="top", size=20)
            ax.text(6.35, 0.15, "1%", color="black", verticalalignment="top", size=20)
            ax.text(6.35, 0.12, "10%", color="black", verticalalignment="top", size=20)

    fig.supxlabel("Frequency [µHz]", fontsize=20)
    fig.supylabel("Normalised Power", fontsize=20)

    plt.tight_layout()
    plt.savefig("plots/{}_periodogram.png".format("ESPRESSO" if ESPRESSO else "HARPS"))
    print(
        f"Plot saved successfully to plots/{'ESPRESSO' if ESPRESSO else 'HARPS'}_periodogram.png"
    )


# --- Plot the periodogram
[docs]def run_periodogram_plot(filepath, ESPRESSO=True):
    """Run plotting for given instrument data."""
    # Load data
    X = unpickle_data(filepath)
    X_pre, X_post, X_harps = X["ESPRESSO_pre"], X["ESPRESSO_post"], X["HARPS"]

    if ESPRESSO:
        time = np.concatenate([X_pre["Time"], X_post["Time"]])
        data_df = pd.concat([X_pre, X_post], ignore_index=True)
        activity_indices = ["RV", "FWHM", "BIS", "Sindex", "NaD", "Halpha", "Contrast"]
    else:
        time = X_harps["Time"]
        data_df = X_harps
        activity_indices = ["RV", "Sindex", "NaD", "Halpha", "Hbeta", "Hgamma"]

    # Define periodogram parameters
    true_periods = [2.2531136, 3.6906777, 7.4507245, 78.22, 78.22 / 2]
    period_colors = ["red", "green", "blue", "black", "black"]
    min_frequency = 0.002025227467386652
    max_frequency = 0.5177264355163222
    samples_per_peak = 3000

    # Plot the periodograms
    plot_periodogram(
        time,
        data_df,
        activity_indices,
        true_periods,
        period_colors,
        min_frequency,
        max_frequency,
        samples_per_peak,
        ESPRESSO=ESPRESSO,
    )