Sampling Galactic binaries§

In this tutorial, we will walk through the process of building a likelihood callable to feed into an MCMC sampler. We will see

  1. How to generate a mock data with FastGB,

  2. How to generate a frequency domain waveform in the format of ProjectedWaveform.

Some snippets of code are adapted from the LISA Data Generation and Analysis Workshop tutorial.

Dependencies§

  • numpy: the fundamental package for scientific computing with Python.

  • fastgb: a fast waveform generator for Galactic binaries in the LISA band.

  • LISA Orbits: a Python package to generate LISA orbits.

Let’s get started!§

A GB source is described by 8 parameters. We can use a NamedTuple to encapsulate them. This is not necessary, but it makes the code more readable and maintainable.

Hide code cell source

from typing import NamedTuple

import lovelyplots
import matplotlib.pyplot as plt
import numpy as np
import numpy.typing as npt

plt.style.use(["paper", "use_mathtext", "colors5"])
del lovelyplots

Param = float | np.floating | npt.NDArray[np.floating]


class GBSource(NamedTuple):
    """Encapsulator for GB source parameters."""

    amp: Param
    """Amplitude"""

    f0: Param
    """Frequency"""

    fdot: Param
    """Frequency derivative"""

    phi0: Param
    """Initial phase"""

    inc: Param
    """Inclination angle"""

    psi: Param
    """Polarisation angle"""

    lam: Param
    """Ecliptic longitude"""

    beta: Param
    """Ecliptic latitude"""

    def asdict(self):
        """Convert to dictionary."""
        return self._asdict()

    @classmethod
    def keys(cls):
        """Return the keys."""
        return cls._fields

To build the waveform, we can wrap the fast GB generator with a class.

from fastgb.fastgb import FastGB

import typed_lisa_toolkit as tlt


class GBGen:
    """Class to generate GB waveforms."""

    def __init__(self, fastgb: FastGB):
        self.fastgb = fastgb

    def __call__(self, params: GBSource):
        """Generate a GB waveform."""
        params_dict = params.asdict()
        # FastGB accepts the parameters in a fixed order
        fastgb_param_order = ["f0", "fdot", "amp", "beta", "lam", "psi", "inc", "phi0"]
        params_array = np.column_stack([params_dict[key] for key in fastgb_param_order])
        X, Y, Z, kmin = self.fastgb.get_fd_tdixyz(params_array, tdi2=False)
        A, E, T = tlt.shop.xyz2aet(X=X, Y=Y, Z=Z)
        # Get the frequencies
        df = 1 / self.fastgb.T
        f_min, f_max = df * kmin, df * (kmin + self.fastgb.N)
        freqs = np.linspace(f_min, f_max, self.fastgb.N, axis=-1, endpoint=False)

        for i in range(freqs.shape[0]):
            if i != 0 and not np.array_equal(freqs[0], freqs[i]):
                msg = "Frequencies are not the same across all sources."
                raise ValueError(msg)

        A, E, T, freqs = (
            A[:, None, None, None, :],
            E[:, None, None, None, :],
            T[:, None, None, None, :],
            tlt.linspace_from_array(freqs[0]),
        )

        # Encapsulate the result
        result = tlt.projected_waveform(
            {
                "A": tlt.frequency_series(freqs, A),
                "E": tlt.frequency_series(freqs, E),
                "T": tlt.frequency_series(freqs, T),
            },
        )
        return result

Let us consider one year of observation with LISA, with cadence of 5 seconds. Note that the observation time in data analysis must be an integer multiple of the cadence, so not exactly one year strictly speaking.

import lisaconstants  # this is a dependency of lisaorbits
import lisaorbits

T_obs, cadence = lisaconstants.ASTRONOMICAL_YEAR, 5
T_obs = np.rint(T_obs / cadence) * cadence

orbits = lisaorbits.EqualArmlengthOrbits()
gb_gen = GBGen(FastGB(orbits=orbits, T=T_obs, delta_t=cadence, N=256))

# Prepare a GB source to inject
inj_source = GBSource(
    amp=1.8326657820908912e-23,
    f0=0.006009098866294652,
    fdot=2.755088767708786e-16,
    phi0=2.504337144699295,
    inc=0.8694902600959342,
    psi=5.053288418125767,
    lam=5.17647019318475,
    beta=0.6865491022703035,
)

To generate the mock data, we need to insert the generated waveform into the frequency grid corresponding to the observation time and cadence.

data_frequencies = tlt.axis(np.fft.rfftfreq(int(np.rint(T_obs / cadence)), d=cadence))

inj_wav = gb_gen(inj_source)
inj_data = tlt.fsdata(inj_wav)
inj_data = inj_data.get_embedded((data_frequencies,))

We can visualize the waveform:

fig, axs = tlt.plot(inj_wav, plot_mode="semilogy", set_legend=False)
../_images/0c304aa48dc15a0fa27c49c007d477dca6c9acdc2365679d2360835eabc5a144.png

And also in the full frequency range:

fig, axs = tlt.plot(inj_data, set_legend=False)
../_images/4dda6d2465177fbc5b93fce3714f15c51989af0bbe8095fe4fe68764b8052a95.png

To build the likelihood callable, we can use FDWhittleLikelihood. The last piece we need is the noise power spectral density. We use the 7 parameter noise model from the Gee-Moo global-fit.

Hide code cell source

def parametric_noise_7p(pars, frqs, option="A", arm_len=2.5e9, *, log_amp=False):
    """Return noise psd for 7 parameter noise model."""
    psd_acc, psd_oms, amp, freq1, freq2, alpha, fknee = pars
    if log_amp:
        psd_acc = 10**psd_acc
        psd_oms = 10**psd_oms
        amp = 10**amp

    omega = 2.0 * np.pi * frqs

    psd_pm = (
        psd_acc
        * (1.0 + (0.4e-3 / frqs) ** 2)
        * (1.0 + (frqs / 8.0e-3) ** 4)
        / (lisaconstants.SPEED_OF_LIGHT * omega) ** 2
    )

    psd_op = (
        psd_oms
        * (1.0 + (2.0e-3 / frqs) ** 4)
        * (omega / lisaconstants.SPEED_OF_LIGHT) ** 2
    )

    x = omega * (arm_len / lisaconstants.SPEED_OF_LIGHT)

    if option in ["A", "E"]:
        psd_instr = (
            8.0
            * np.sin(x) ** 2
            * (
                2.0 * psd_pm * (3.0 + 2.0 * np.cos(x) + np.cos(2 * x))
                + psd_op * (2.0 + np.cos(x))
            )
        )

    elif option == "X":
        psd_instr = (
            16.0 * np.sin(x) ** 2 * (2.0 * (1.0 + np.cos(x) ** 2) * psd_pm + psd_op)
        )

    elif option == "T":
        psd_instr = (
            16.0 * psd_op * (1.0 - np.cos(x)) * np.sin(x) ** 2
            + 128.0 * psd_pm * np.sin(x) ** 2 * np.sin(0.5 * x) ** 4
        )

    else:
        msg = "Only channels A, E, T, X are available."
        raise NotImplementedError(msg)

    if option != "T":
        psd_gal = (
            amp
            * frqs ** (-7.0 / 3.0)
            * np.exp(-((frqs / freq1) ** alpha))
            * 0.5
            * (1.0 + np.tanh(-(frqs - fknee) / freq2))
        )
        psd_gal = 4.0 * (x * np.sin(x)) ** 2 * psd_gal

        if option in ["A", "E"]:
            psd_gal = 1.5 * psd_gal
    else:
        psd_gal = 0.0

    return psd_instr + psd_gal


class P7Noise:
    """7 parameter noise model."""

    def __init__(self, pars, *, log_amp=False):
        self.pars = pars
        self.log_amp = log_amp

    def psd(self, frequencies, option: str = "A"):
        """Return the parametric noise PSD."""
        if frequencies[0] == 0:
            frequencies[0] = 1e-10 * frequencies[1]  # avoid division by zero
        return parametric_noise_7p(
            self.pars,
            frequencies,
            option=option,
            log_amp=self.log_amp,
        )


# Set the noise parameters
noise_params = [
    5.55620285e-30,
    5.44814245e-23,
    1.44305853e-44,
    3.13562154e-03,
    1.88869275e-03,
    1.78907291e00,
    2.46721224e-03,
]

p7noise = P7Noise(noise_params, log_amp=False)

from typed_lisa_toolkit import make_sdm, noise_model

channel_names = ("A", "E", "T")
ipsd_diag = np.stack(
    [np.squeeze(1 / p7noise.psd(data_frequencies.ax, option=c)) for c in channel_names],
    axis=-1,
)
sdm = make_sdm(
    ipsd_diag,
    frequencies=data_frequencies,
    channel_names=channel_names,
    is_diagonal=True,
)
noisemodel = noise_model(sdm)

Finally, we have the likelihood callable, which can be fed into an MCMC sampler. Let’s check the result with the injected parameters (it should be close to 0).

from typed_lisa_toolkit import whittle
from typed_lisa_toolkit.types import FDNoiseModel, FSData


def src_wrap(x: np.ndarray):
    """Return a GB source."""
    # If x is of shape (n, ) reshape it to (1,n)
    if len(x.shape) == 1:
        x = x.reshape((1, -1))
    params = np.split(x, axis=1, indices_or_sections=x.shape[1])
    src = GBSource(*params)
    return src


class LogLikelihood:
    """Log likelihood computation class."""

    def __init__(
        self,
        data: FSData,
        noisemodel: FDNoiseModel,
        waveform_generator: GBGen,
    ):
        self.likelihood = whittle(data, noisemodel)
        self.waveform_generator = waveform_generator

    def __call__(self, x: np.ndarray):
        """Return the log-likelihood."""
        source = src_wrap(x)
        return self.likelihood.get_log_likelihood(self.waveform_generator(source))


log_likelihood = LogLikelihood(inj_data, noisemodel.reset(), gb_gen)

log_likelihood(
    np.array(list(inj_source)),
).sum()  # Should be zero modulo numerical errors for the injected source
np.float64(-0.0008253246055334573)