import re
import pandas as pd

file_path = "rate_coef_Sn.dat"

with open(file_path, "r") as f:
    content = f.read()

lines = content.splitlines()

# Dictionary to store tables with their respective dataframes
tables = {}
current_table = None
headers = []
data_rows = []

# Mapping for the short names to full descriptions
full_name_map = {
    "ci": "electron_impact_direct_ionization",
    "ea": "electron_impact_excitation_autoionization",
    "rr": "radiative_recombination",
    "dr": "dielectronic_recombination",
    "bb": "bound_bound_radiative_loss_rates",
    "bf": "recombination_radiative_loss_rates",
    "csd": "charge_state_distribution"
}

# Regular expression to detect a table header (e.g., "ci temp Sn0+ Sn1+ ...")
header_pattern = re.compile(r'^(ci|ea|rr|dr|bb|bf|csd)\s+temp')

# Iterate through each line in the file
for line in lines:
    if header_pattern.match(line):
        if current_table and headers and data_rows:
            headers = ['temp' if h == 'temp' else re.sub(r'\D', '', h) for h in headers]
            df = pd.DataFrame(data_rows, columns=headers)
            df.iloc[:, 0] = pd.to_numeric(df.iloc[:, 0], errors='coerce')  # Convert temp column to numeric
            df.set_index(headers[0], inplace=True)
            tables[current_table] = df

        parts = line.strip().split()
        current_table = parts[0]
        headers = parts[1:]
        data_rows = []
    elif current_table and line.strip() and not line.strip().startswith("#"):
        parts = line.strip().split()
        if len(parts) == len(headers) + 1:
            data_rows.append(parts)
        elif len(parts) == len(headers): 
            data_rows.append(parts)

# Save the last table
if current_table and headers and data_rows:
    df = pd.DataFrame(data_rows, columns=headers)
    df.iloc[:, 0] = pd.to_numeric(df.iloc[:, 0], errors='coerce')
    df.set_index(headers[0], inplace=True)
    tables[current_table] = df

# Save all tables to CSV files with full descriptive names
csv_paths = {}
for name, df in tables.items():
    full_name = full_name_map.get(name, name)  # Default to name if no mapping found
    path = f"{full_name}.csv"
    df.to_csv(path)
    csv_paths[name] = path