Source code for rsoft_cad.femsim.data_processing

import os
import re

import pandas as pd

from typing import Dict, List, Tuple
from collections import defaultdict
from rsoft_cad.utils import find_files_by_extension, read_nef_file



[docs]
def extract_run_names(file_paths: List[str]) -> List[str]:
    """
    Extract 'run_XXX' parts from a list of file paths.

    Args:
        file_paths (List[str]): List of file paths containing run_XXX.nef files

    Returns:
        List[str]: List of extracted run names in the format 'run_XXX'
    """
    run_names = []

    for path in file_paths:
        # Extract filename from path
        filename = os.path.basename(path)

        # Extract run_XXX using regex (removing the .nef extension)
        match = re.match(r"(run_\d+)\.nef", filename)
        if match:
            run_names.append(match.group(1))

    return run_names




[docs]
def get_z_positions_from_runs(
    dataframe: pd.DataFrame, file_paths: List[str]
) -> Tuple[List[float], List[str]]:
    """
    Extract z_pos values from a DataFrame based on run names found in file paths.

    Args:
        dataframe (pd.DataFrame): DataFrame with 'filename' and 'z_pos' columns
        file_paths (List[str]): List of file paths containing run_XXX.nef files

    Returns:
        Tuple[List[float], List[str]]: A tuple containing:
            - List of z_pos values corresponding to the run names
            - List of run names extracted from file paths
    """
    # Extract run names from file paths
    run_names = extract_run_names(file_paths)

    # Filter DataFrame to get only rows with matching filenames
    filtered_df = dataframe[dataframe["filename"].isin(run_names)]

    # Extract z_pos values
    z_positions = filtered_df["z_pos"].tolist()

    return z_positions, run_names




[docs]
def process_nef_files(
    folder_path: str, include_subfolders: bool = True
) -> Tuple[Dict[int, List[float]], Dict[int, List[float]], List[str], List[str]]:
    """
    Process multiple .nef files and extract relevant data.

    Args:
        folder_path (str): Path to the folder containing .nef files
        include_subfolders (bool): If True, search for files in subfolders as well

    Returns:
        Tuple containing:
            - Dictionary of real index data by mode index
            - Dictionary of imaginary index data by mode index
            - List of file names
            - List of file paths
    """
    # Find all .nef files
    nef_files = find_files_by_extension(folder_path, ".nef", include_subfolders)

    if not nef_files:
        print(f"No .nef files found in {folder_path}")
        return {}, {}, [], []

    print(f"Found {len(nef_files)} .nef files")

    # Dictionary to organize data by index
    index_data_real = defaultdict(list)
    index_data_imag = defaultdict(list)
    file_names = []

    # Process each file
    for file_path in nef_files:
        try:
            # Get filename for display
            filename = os.path.basename(file_path)
            file_names.append(os.path.splitext(filename)[0])  # Remove extension

            # Read the data
            data = read_nef_file(file_path)

            # Store data by index
            for j, idx in enumerate(data["indices"]):
                index_data_real[idx].append(data["n_eff_real"][j])
                index_data_imag[idx].append(data["n_eff_imag"][j])

        except Exception as e:
            print(f"Error processing {file_path}: {e}")

    return index_data_real, index_data_imag, file_names, nef_files




[docs]
def create_dataframe_from_nef_data(
    index_data: Dict[int, List[float]],
    x_values: List[float],
    index_id: int,
) -> pd.DataFrame:
    """
    Create a DataFrame from NEF data for a specific mode index.

    Args:
        index_data (Dict[int, List[float]]): Dictionary of data by mode index
        x_values (List[float]): X-axis values (e.g., taper lengths)
        index_id (int): Mode index to extract

    Returns:
        pd.DataFrame: DataFrame with x and y columns
    """
    if index_id not in index_data or not index_data[index_id]:
        return pd.DataFrame()

    # Get the data for this index
    y_values = index_data[index_id]

    # Create DataFrame
    df = pd.DataFrame(
        {
            "taper_length": x_values[: len(y_values)],  # Ensure lengths match
            "n_eff": y_values,
        }
    )

    return df




[docs]
def create_axis_values(
    folder_path: str,
    nef_files: List[str],
    file_names: List[str],
    use_filename_as_x: bool,
) -> Tuple[List[float], List[str]]:
    """
    Create x-axis values and labels for plotting.

    Args:
        folder_path (str): Path to the folder containing .nef files
        nef_files (List[str]): List of .nef file paths
        file_names (List[str]): List of file names without extensions
        use_filename_as_x (bool): If True, use filenames as x-axis; otherwise use z positions

    Returns:
        Tuple[List[float], List[str]]: x-values and x-labels for plotting
    """
    # Get parent directory of the data folder
    expt_dir, _ = os.path.split(folder_path)

    try:
        # Read the CSV containing z-position data
        x_values_df = pd.read_csv(os.path.join(expt_dir, "x_values.csv"))

        # Get z-positions from the data frame
        z_positions, _ = get_z_positions_from_runs(x_values_df, nef_files)

        # Set x values and labels
        x_values = z_positions
        x_labels = file_names if use_filename_as_x else [str(x) for x in z_positions]

        return x_values, x_labels

    except Exception as e:
        print(f"Error creating axis values: {e}")
        print("Using default numerical indices for x-axis.")

        # Fallback to using numerical indices
        x_values = list(range(len(file_names)))
        x_labels = file_names if use_filename_as_x else [str(i) for i in x_values]

        return x_values, x_labels