In [None]:
#!pip install git+https://gitlab.cern.ch/lua/acc-bpt-nx-api.git

In [None]:
from __future__ import annotations

import datetime
import os
import pathlib
import re
import sys

import dateutil.parser
import numpy as np
import pandas as pd
import pyda
import pyda_lsa
import pyrbac
import pytz

sys.path.append("MD20250822")
from acc_bpt_nx_api.utils_spark_config import SparkConfig, get_nxcals_spark_session

from nxcals_helpers import different_type_variables_to_dataframe

In [68]:
MACOS_JAVA_HOME = "/opt/homebrew/Cellar/openjdk@11/11.0.28"
if pathlib.Path(MACOS_JAVA_HOME).exists():
    os.environ["JAVA_HOME"] = MACOS_JAVA_HOME

In [69]:
spark = get_nxcals_spark_session(SparkConfig(app_name="chroma_MD20250822")).spark

In [70]:
def convert_to_utc_naive(time_str: str | datetime.datetime) -> datetime.datetime:
    local = dateutil.parser.parse(time_str) if isinstance(time_str, str) else time_str

    if local.tzinfo is None:
        local = pytz.timezone("Europe/Zurich").localize(
            local
        )  # Assuming the times are in the 'Europe/Zurich' timezone
    utc_time = local.astimezone(pytz.utc)
    return utc_time.replace(tzinfo=None)  # Make the datetime naive

In [87]:
# Check times in https://logbook.cern.ch/elogbook-server/GET/showEventInLogbook/4362106 (11/08/2025 SPS MD)

accelerator = "SPS"
timing_user = "MD4"
selector = f"{accelerator}.USER.{timing_user}"
lsa_cycle = "MD_SHiP_L1230_East_Extraction_2025_V2"

DATE = "2025-08-22"
start_time = f"{DATE} 12:50:40.000+02:00"
end_time = f"{DATE} 13:09:35.000+02:00"
start_time = dateutil.parser.parse(start_time)
end_time = dateutil.parser.parse(end_time)

# start_time = convert_to_utc_naive(start_time)
# end_time = convert_to_utc_naive(end_time)

In [72]:
da = pyda.SimpleClient(
    provider=pyda_lsa.LsaProvider(
        rbac_token=pyrbac.AuthenticationClient().login_location()
    )
)
endpoint = pyda_lsa.LsaEndpoint.from_str("SpsLowLevelRF/DpOverPOffset#value")
context = pyda_lsa.LsaCycleTimespanContext(
    cycle=lsa_cycle, start=start_time, end=end_time
)

In [73]:
response = da.get(endpoint=endpoint, context=context)
response.data["value"]

Unnamed: 0,trim_time,value,comment
0,2025-08-22 12:50:39.916,<pyda.data.DiscreteFunction object at 0x7f6cf7...,MultiQ dp/p offset trim -4.00 permill
1,2025-08-22 12:51:52.304,<pyda.data.DiscreteFunction object at 0x7f6b8e...,MultiQ dp/p offset trim 0.50 permill
2,2025-08-22 12:53:07.885,<pyda.data.DiscreteFunction object at 0x7f6b8e...,MultiQ dp/p offset trim 0.50 permill
3,2025-08-22 12:54:23.503,<pyda.data.DiscreteFunction object at 0x7f6b8e...,MultiQ dp/p offset trim 0.50 permill
4,2025-08-22 12:55:39.103,<pyda.data.DiscreteFunction object at 0x7f6b8e...,MultiQ dp/p offset trim 0.50 permill
5,2025-08-22 12:56:54.666,<pyda.data.DiscreteFunction object at 0x7f6b8e...,MultiQ dp/p offset trim 0.50 permill
6,2025-08-22 12:58:10.327,<pyda.data.DiscreteFunction object at 0x7f6b8e...,MultiQ dp/p offset trim 0.50 permill
7,2025-08-22 12:59:25.922,<pyda.data.DiscreteFunction object at 0x7f6b8e...,MultiQ dp/p offset trim 0.50 permill
8,2025-08-22 13:00:41.520,<pyda.data.DiscreteFunction object at 0x7f6b8e...,MultiQ dp/p offset trim 0.50 permill
9,2025-08-22 13:01:57.108,<pyda.data.DiscreteFunction object at 0x7f6b8e...,MultiQ dp/p offset trim 0.50 permill


In [74]:
pattern = re.compile(r"MultiQ dp/p offset trim ([+-]?\d+(?:\.\d+)?)(?: permill)?")

In [75]:
# extract dp/p with regex

df_dpp = response.data["value"].copy()

df_dpp = df_dpp[df_dpp["comment"].astype(str).str.contains(pattern)]
df_dpp["dp/p"] = df_dpp["comment"].astype(str).str.extract(pattern)
df_dpp["dp/p"] = np.cumsum(pd.to_numeric(df_dpp["dp/p"], errors="coerce")) * 1e-3

  df_dpp = df_dpp[df_dpp["comment"].astype(str).str.contains(pattern)]


In [76]:
df_dpp

Unnamed: 0,trim_time,value,comment,dp/p
0,2025-08-22 12:50:39.916,<pyda.data.DiscreteFunction object at 0x7f6cf7...,MultiQ dp/p offset trim -4.00 permill,-0.004
1,2025-08-22 12:51:52.304,<pyda.data.DiscreteFunction object at 0x7f6b8e...,MultiQ dp/p offset trim 0.50 permill,-0.0035
2,2025-08-22 12:53:07.885,<pyda.data.DiscreteFunction object at 0x7f6b8e...,MultiQ dp/p offset trim 0.50 permill,-0.003
3,2025-08-22 12:54:23.503,<pyda.data.DiscreteFunction object at 0x7f6b8e...,MultiQ dp/p offset trim 0.50 permill,-0.0025
4,2025-08-22 12:55:39.103,<pyda.data.DiscreteFunction object at 0x7f6b8e...,MultiQ dp/p offset trim 0.50 permill,-0.002
5,2025-08-22 12:56:54.666,<pyda.data.DiscreteFunction object at 0x7f6b8e...,MultiQ dp/p offset trim 0.50 permill,-0.0015
6,2025-08-22 12:58:10.327,<pyda.data.DiscreteFunction object at 0x7f6b8e...,MultiQ dp/p offset trim 0.50 permill,-0.001
7,2025-08-22 12:59:25.922,<pyda.data.DiscreteFunction object at 0x7f6b8e...,MultiQ dp/p offset trim 0.50 permill,-0.0005
8,2025-08-22 13:00:41.520,<pyda.data.DiscreteFunction object at 0x7f6b8e...,MultiQ dp/p offset trim 0.50 permill,0.0
9,2025-08-22 13:01:57.108,<pyda.data.DiscreteFunction object at 0x7f6b8e...,MultiQ dp/p offset trim 0.50 permill,0.0005


In [77]:
variables = [
    "SPS.LSA:CYCLE",
    "SPS.TGM:USER",
    "SPS.TGM:BEAMID",
    "SPS.TGM:DDEST",
    # 'SPS.BCTDC24.51454:Acquisition:measStamp', # 5ms resolution starting from 0 being injection
    # 'SPS.BCTDC24.51454:Acquisition:measStamp_unitExponent',
    # 'SPS.BCTDC24.51454:Acquisition:totalIntensity',
    # 'SPS.BCTDC24.51454:Acquisition:totalIntensity_unitExponent',
    # 'SPS.BCTDC.51454:TOTAL_INTENSITY',
    "SPSQC:TOTAL_INJECTED_INTENSITY",  # total injected intensity
    # 'SPSQC:cpsTotalExtractedIntensity',   # CPS Total extracted intensity
    # 'SPSQC:INJECTION_INTENSITY', # intensity of each injection
    # 'SPSQC:cpsExtractedIntensities', # extracted intensities
    "SPS.BQ.QC:Acquisition:estimatedTuneH",
    "SPS.BQ.QC:Acquisition:estimatedTuneV",
    "SPS.BQ.QC:Acquisition:measStamp",
    "SPS.BQ.QC:Acquisition:doneNbOfMeas",
    "SPS.BQ.CONT:ContinuousAcquisition:rawDataH",
    "SPS.BQ.CONT:ContinuousAcquisition:rawDataV",
]

df_raw = different_type_variables_to_dataframe(
    convert_to_utc_naive(start_time),
    convert_to_utc_naive(end_time),
    variables,
    merge_timestamps=True,
    on="nxcals_timestamp",
    direction="forward",
    tolerance="1.2 seconds",
    spark=spark,
)

                                                                                

In [78]:
df = df_raw[
    df_raw["SPS.LSA:CYCLE"] == lsa_cycle
]  # select the rows that correspond to your cycle
# df = df_raw[df_raw['SPS.TGM:DDEST']=='FTARGET'] # select the rows that correspond to your cycle

# df.dropna(subset=['SPS.BCTDC.51456:Acquisition:measStamp'], inplace=True)
# df.dropna(subset=['SPSQC:TOTAL_INJECTED_INTENSITY'], inplace=True)
df["nxcals_timestamp"] += pd.Timedelta(2, unit="hours")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["nxcals_timestamp"] += pd.Timedelta(2, unit="hours")


In [79]:
offset_df = df_dpp[["trim_time", "dp/p"]].rename(
    columns={"dp/p": "dp_over_p", "trim_time": "timestamp"}
)
offset_df = offset_df.sort_values("timestamp")
df = df.sort_values("nxcals_timestamp")

In [80]:
# Merge
df = pd.merge_asof(
    df,
    offset_df,
    left_on="nxcals_timestamp",
    right_on="timestamp",
    direction="backward",
)
# df

## Some basic post-process

In [81]:
df2 = df.copy()

In [82]:
# Remove unwanted stuff
df2 = df2.drop("SPS.LSA:CYCLE", axis=1)
df2 = df2.drop("SPS.TGM:USER", axis=1)
df2 = df2.drop("SPS.TGM:BEAMID", axis=1)
df2 = df2.drop("SPS.TGM:DDEST", axis=1)
df2 = df2.drop("index", axis=1)
df2 = df2.drop("timestamp", axis=1)

In [83]:
# Mask intensity
df2["injected_intensity"] = df2["SPSQC:TOTAL_INJECTED_INTENSITY"]
df2 = df2.drop("SPSQC:TOTAL_INJECTED_INTENSITY", axis=1)
df2 = df2[df2["injected_intensity"] > 1e11]

In [84]:
# Extract BBQ tunes (not really needed since tune is re-calculated from raw data using nafflib)
df2["tuneH"] = df2.apply(
    lambda row: row["SPS.BQ.QC:Acquisition:estimatedTuneH"]["elements"], axis=1
)
df2 = df2.drop("SPS.BQ.QC:Acquisition:estimatedTuneH", axis=1)
df2["tuneV"] = df2.apply(
    lambda row: row["SPS.BQ.QC:Acquisition:estimatedTuneV"]["elements"], axis=1
)
df2 = df2.drop("SPS.BQ.QC:Acquisition:estimatedTuneV", axis=1)
df2["tunect"] = df2.apply(
    lambda row: row["SPS.BQ.QC:Acquisition:measStamp"]["elements"], axis=1
)
df2 = df2.drop("SPS.BQ.QC:Acquisition:measStamp", axis=1)
df2["tuneNbOfMeas"] = df2["SPS.BQ.QC:Acquisition:doneNbOfMeas"]
df2 = df2.drop("SPS.BQ.QC:Acquisition:doneNbOfMeas", axis=1)

# Get raw data
df2["rawDataH"] = df2.apply(
    lambda row: row["SPS.BQ.CONT:ContinuousAcquisition:rawDataH"]["elements"], axis=1
)
df2 = df2.drop("SPS.BQ.CONT:ContinuousAcquisition:rawDataH", axis=1)
df2["rawDataV"] = df2.apply(
    lambda row: row["SPS.BQ.CONT:ContinuousAcquisition:rawDataV"]["elements"], axis=1
)
df2 = df2.drop("SPS.BQ.CONT:ContinuousAcquisition:rawDataV", axis=1)

# Get time (?)
Trev = 1 / 43.2786e3 * 1e3  # in ms
df2["rawDataH_beamtime"] = df2.apply(
    lambda row: np.arange(0, len(row["rawDataH"]), 1) * Trev, axis=1
)
df2["rawDataV_beamtime"] = df2.apply(
    lambda row: np.arange(0, len(row["rawDataV"]), 1) * Trev, axis=1
)

In [88]:
# Chroma calculation in 02_compute_chroma.ipynb
# save_dataframe(df2, './', 'df_chroma_data_20250811_HV_dpp_pm4e-3.pkl')
# save_dataframe(df2, './', 'df_chroma_data_20250811_HV_dpp_pm4e-3_woMomentumRamp.pkl')
df2.to_parquet(
    f"df_chroma_data_{DATE.replace('-', '')}_HV_dpp_pm4e-3_woMomentumRamp_flatMD1.parquet"
)