"""
DEER Peak 3-Day Period Analysis - CZ2025
Identifies the hottest consecutive 3-weekday period per California climate zone.

- 2009 calendar for weekdays and holidays (DEER2023 / Title 24).
- Equal-weight index: PeakT + AvgT72 + AvgT_noon_6 (peak temp, 3-day average, noon-6pm average).
- Reads CZ2025 EPW files from CZ2025_WeatherFiles folder.
"""

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import os

# ---------------------------------------------------------------------------
# Configuration: WEATHER_DIR = folder containing the 16 CZ2025 .epw files
# ---------------------------------------------------------------------------
_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
_BASE_DIR = os.path.dirname(_SCRIPT_DIR)
WEATHER_DIR = os.path.join(_BASE_DIR, "CZ2025_WeatherFiles")
OUTPUT_CSV = os.path.join(_SCRIPT_DIR, "peakperspec_cz2025.csv")

# Data dictionary: short description for each CSV column (written as comment lines at top of CSV)
CSV_COLUMN_DESCRIPTIONS = [
    ("BldgLoc", "California climate zone ID (CZ01 through CZ16)"),
    ("PkDay", "Day of year (1-366) for the start date of the peak 3-day period (2009)"),
    ("PkHr", "Start hour of the 3-day window (always 0 = midnight)"),
    ("PeakTempHr", "Hour of day (0-23) when the peak dry-bulb temperature occurred within the 72-hour period"),
    ("PkDayOffset", "Which day in the 3-day window had PeakT: 0=first day (PkDate), 1=second, 2=third"),
    ("PkDate", "Start date of the peak 3-day period (YYYY-MM-DD, 2009)"),
    ("SourceWeatherSet", "EPW weather file name for this climate zone"),
    ("DayOfWeek", "Day of week for PkDate (Monday, Tuesday, Wednesday)"),
    ("PeakT", "Peak dry-bulb temperature in the 72-hour period (°C)"),
    ("PeakT_F", "Peak dry-bulb temperature (°F)"),
    ("AvgT72", "Average dry-bulb temperature over the 72-hour period (°C)"),
    ("AvgT72_F", "Average dry-bulb over 72 hours (°F)"),
    ("AvgT_noon_6", "Average dry-bulb from noon to 6 PM over the 3 days (°C)"),
    ("AvgT_noon_6_F", "Average dry-bulb noon-6 PM (°F)"),
    ("TotalScore", "Equal-weight index: PeakT + AvgT72 + AvgT_noon_6 (°C)"),
    ("TotalScore_F", "Same index in °F (PeakT_F + AvgT72_F + AvgT_noon_6_F)"),
]

# Observed holidays in 2009 (from DEER/Title 24 2009 calendar)
CA_HOLIDAYS_2009 = [
    datetime(2009, 1, 1),    # New Year's Day
    datetime(2009, 1, 19),  # Martin Luther King Day
    datetime(2009, 2, 16),  # Presidents Day
    datetime(2009, 5, 25),  # Memorial Day
    datetime(2009, 7, 3),   # July 4 observed (July 4 was Saturday)
    datetime(2009, 9, 7),   # Labor Day
    datetime(2009, 10, 12), # Columbus Day
    datetime(2009, 11, 11), # Veterans Day
    datetime(2009, 11, 26), # Thanksgiving Day
    datetime(2009, 12, 25), # Christmas
]

# CZ2025 station mapping: BldgLoc -> EPW filename (must match WEATHER_DIR contents)
CZ2025_EPW_FILES = {
    "CZ01": "CA_CZ01_ARCATA-AP_725945_CZ2025.epw",
    "CZ02": "CA_CZ02_SANTA-ROSA(AWOS)_724957_CZ2025.epw",
    "CZ03": "CA_CZ03_OAKLAND-METRO-AP_724930_CZ2025.epw",
    "CZ04": "CA_CZ04_PASO-ROBLES-MUNI-AP_723965_CZ2025.epw",
    "CZ05": "CA_CZ05_SANTA-MARIA-PUBLIC-AP_723940_CZ2025.epw",
    "CZ06": "CA_CZ06_LOS-ANGELES-IAP_722950_CZ2025.epw",
    "CZ07": "CA_CZ07_SAN-DIEGO-LINDBERGH-FIELD_722900_CZ2025.epw",
    "CZ08": "CA_CZ08_FULLERTON-MUNI-AP_722976_CZ2025.epw",
    "CZ09": "CA_CZ09_BURBANK-GLNDLE-PASAD-AP_722880_CZ2025.epw",
    "CZ10": "CA_CZ10_RIVERSIDE-MUNI_722869_CZ2025.epw",
    "CZ11": "CA_CZ11_RED-BLUFF-MUNI-AP_725910_CZ2025.epw",
    "CZ12": "CA_CZ12_SACRAMENTO-EXECUTIVE-AP_724830_CZ2025.epw",
    "CZ13": "CA_CZ13_FRESNO-YOSEMITE-IAP_723890_CZ2025.epw",
    "CZ14": "CA_CZ14_PALMDALE-AP_723820_CZ2025.epw",
    "CZ15": "CA_CZ15_PALM-SPRINGS-IAP_722868_CZ2025.epw",
    "CZ16": "CA_CZ16_BLUE-CANYON-AP_725845_CZ2025.epw",
}

EPW_COLUMNS = [
    "year", "month", "day", "hour", "minute", "data_source", "dry_bulb_temp",
    "dew_point_temp", "relative_humidity", "atmospheric_pressure",
    "extraterrestrial_horizontal_radiation", "extraterrestrial_direct_normal_radiation",
    "horizontal_infrared_radiation", "global_horizontal_radiation",
    "direct_normal_radiation", "diffuse_horizontal_radiation",
    "global_horizontal_illuminance", "direct_normal_illuminance", "diffuse_horizontal_illuminance",
    "zenith_luminance", "wind_direction", "wind_speed", "total_sky_cover", "opaque_sky_cover",
    "visibility", "ceiling_height", "present_weather_observation", "present_weather_codes",
    "precipitable_water", "aerosol_optical_depth", "snow_depth", "days_since_last_snowfall",
    "albedo", "liquid_precipitation_depth", "liquid_precipitation_quantity",
]


def celsius_to_fahrenheit(c):
    return (c * 9 / 5) + 32


def read_epw_from_path(epw_path):
    """Read EPW from file path."""
    df = pd.read_csv(epw_path, skiprows=8, header=None, names=EPW_COLUMNS)
    return _epw_to_dataframe(df)


def _epw_to_dataframe(df):
    df["datetime"] = pd.to_datetime(
        df[["year", "month", "day", "hour"]].astype(int), format="%Y%m%d%H"
    )
    df.set_index("datetime", inplace=True)
    df.sort_index(inplace=True)
    return df


def is_valid_3day_start_2009(month, day):
    """
    True if (month, day) in 2009 can start a 3-day period: Mon/Tue/Wed and
    all three days are weekdays and not in CA_HOLIDAYS_2009.
    """
    d = datetime(2009, month, day)
    if d.weekday() > 2:  # only Mon=0, Tue=1, Wed=2
        return False
    for i in range(3):
        check = d + timedelta(days=i)
        if check.weekday() > 4 or check in CA_HOLIDAYS_2009:
            return False
    return True


def get_72h_data(df, start_month, start_day, start_hour=0):
    """Get 72 consecutive hours from TMY by month/day/hour (year-agnostic)."""
    cur = datetime(2000, start_month, start_day, start_hour)
    needed = []
    for _ in range(72):
        needed.append((cur.month, cur.day, cur.hour))
        cur += timedelta(hours=1)
    mask = df.apply(
        lambda r: (r.name.month, r.name.day, r.name.hour) in needed, axis=1
    )
    period = df[mask]
    if len(period) < 72:
        return None
    order = {mdh: i for i, mdh in enumerate(needed)}
    period = period.copy()
    period["_seq"] = period.apply(
        lambda r: order.get((r.name.month, r.name.day, r.name.hour), 999), axis=1
    )
    period = period.sort_values("_seq")
    return period


def score_3day(df, start_month, start_day):
    """
    Equal-weight index = PeakT + AvgT72 + AvgT_noon_6 (all °C).
    Returns dict with PeakT, AvgT72, AvgT_noon_6, TotalScore, PeakTempHr, PkDayOffset or None.
    PeakTempHr = hour of day (0-23) when PeakT occurred; PkDayOffset = 0/1/2 (which day in the 3-day window).
    """
    period = get_72h_data(df, start_month, start_day)
    if period is None:
        return None
    avg_t72 = period["dry_bulb_temp"].mean()
    noon_6 = period[period.index.hour.isin([12, 13, 14, 15, 16, 17])]["dry_bulb_temp"].mean()
    peak_t = period["dry_bulb_temp"].max()
    peak_idx = period["dry_bulb_temp"].idxmax()
    start_norm = period.index.min().normalize()
    peak_hour = peak_idx.hour
    day_offset = (peak_idx.normalize() - start_norm).days
    total = peak_t + avg_t72 + noon_6
    return {
        "PeakT": peak_t,
        "AvgT72": avg_t72,
        "AvgT_noon_6": noon_6,
        "TotalScore": total,
        "PeakTempHr": peak_hour,
        "PkDayOffset": day_offset,
    }


def find_peak_3day(df, climate_zone, weather_filename):
    """Find 3-day period in June–Sept with highest equal-weight index (2009 calendar)."""
    june_sept = df[(df.index.month >= 6) & (df.index.month <= 9)]
    dates = june_sept.index.normalize().unique()
    best = None
    best_score = -np.inf

    for dt in dates:
        m, d = dt.month, dt.day
        if not is_valid_3day_start_2009(m, d):
            continue
        s = score_3day(june_sept, m, d)
        if s is None or s["TotalScore"] <= best_score:
            continue
        best_score = s["TotalScore"]
        pk_2009 = datetime(2009, m, d)
        best = {
            "BldgLoc": climate_zone,
            "PkDay": pk_2009.timetuple().tm_yday,
            "PkHr": 0,
            "PeakTempHr": s["PeakTempHr"],
            "PkDayOffset": s["PkDayOffset"],
            "PkDate": f"2009-{m:02d}-{d:02d}",
            "SourceWeatherSet": weather_filename,
            "DayOfWeek": pk_2009.strftime("%A"),
            "PeakT": round(s["PeakT"], 2),
            "PeakT_F": round(celsius_to_fahrenheit(s["PeakT"]), 2),
            "AvgT72": round(s["AvgT72"], 2),
            "AvgT72_F": round(celsius_to_fahrenheit(s["AvgT72"]), 2),
            "AvgT_noon_6": round(s["AvgT_noon_6"], 2),
            "AvgT_noon_6_F": round(celsius_to_fahrenheit(s["AvgT_noon_6"]), 2),
            "TotalScore": round(s["TotalScore"], 2),
            "TotalScore_F": round(
                celsius_to_fahrenheit(s["PeakT"])
                + celsius_to_fahrenheit(s["AvgT72"])
                + celsius_to_fahrenheit(s["AvgT_noon_6"]),
                2,
            ),
        }
    return best


def main():
    print("DEER Peak 3-Day Period Analysis (CZ2025)")
    print("2009 calendar, full holiday list, equal-weight index (PeakT + AvgT72 + AvgT_noon_6)\n")
    print(f"Weather folder: {WEATHER_DIR}")
    print(f"Output: {OUTPUT_CSV}\n")

    if not os.path.isdir(WEATHER_DIR):
        print(f"Error: WEATHER_DIR not found: {WEATHER_DIR}")
        return

    results = []
    for bldg_loc, epw_name in CZ2025_EPW_FILES.items():
        path = os.path.join(WEATHER_DIR, epw_name)
        if not os.path.isfile(path):
            print(f"  {bldg_loc}: file not found {epw_name}")
            continue
        try:
            df = read_epw_from_path(path)
        except Exception as e:
            print(f"  {bldg_loc}: error reading {epw_name}: {e}")
            continue
        row = find_peak_3day(df, bldg_loc, epw_name)
        if row:
            results.append(row)
            print(f"  {bldg_loc}: {row['PkDate']} ({row['DayOfWeek']}) Score={row['TotalScore']:.2f}")
        else:
            print(f"  {bldg_loc}: no valid 3-day period")

    out = pd.DataFrame(results)
    with open(OUTPUT_CSV, "w", newline="", encoding="utf-8-sig") as f:
        f.write("# Data dictionary (column descriptions)\n")
        for col, desc in CSV_COLUMN_DESCRIPTIONS:
            f.write(f"#   {col}: {desc}\n")
        f.write("\n")
        out.to_csv(f, index=False)
    print(f"\nSaved: {OUTPUT_CSV}")
    print(out.to_string(index=False))


if __name__ == "__main__":
    main()
