""" DEER Peak 3-Day Period Analysis - CZ2025 Identifies the hottest consecutive 3-weekday period per California climate zone. - 2009 calendar for weekdays and holidays (DEER2023 / Title 24). - Equal-weight index: PeakT + AvgT72 + AvgT_noon_6 (peak temp, 3-day average, noon-6pm average). - Reads CZ2025 EPW files from CZ2025_WeatherFiles folder. """ import pandas as pd import numpy as np from datetime import datetime, timedelta import os # --------------------------------------------------------------------------- # Configuration: WEATHER_DIR = folder containing the 16 CZ2025 .epw files # --------------------------------------------------------------------------- _SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) _BASE_DIR = os.path.dirname(_SCRIPT_DIR) WEATHER_DIR = os.path.join(_BASE_DIR, "CZ2025_WeatherFiles") OUTPUT_CSV = os.path.join(_SCRIPT_DIR, "peakperspec_cz2025.csv") # Data dictionary: short description for each CSV column (written as comment lines at top of CSV) CSV_COLUMN_DESCRIPTIONS = [ ("BldgLoc", "California climate zone ID (CZ01 through CZ16)"), ("PkDay", "Day of year (1-366) for the start date of the peak 3-day period (2009)"), ("PkHr", "Start hour of the 3-day window (always 0 = midnight)"), ("PeakTempHr", "Hour of day (0-23) when the peak dry-bulb temperature occurred within the 72-hour period"), ("PkDayOffset", "Which day in the 3-day window had PeakT: 0=first day (PkDate), 1=second, 2=third"), ("PkDate", "Start date of the peak 3-day period (YYYY-MM-DD, 2009)"), ("SourceWeatherSet", "EPW weather file name for this climate zone"), ("DayOfWeek", "Day of week for PkDate (Monday, Tuesday, Wednesday)"), ("PeakT", "Peak dry-bulb temperature in the 72-hour period (°C)"), ("PeakT_F", "Peak dry-bulb temperature (°F)"), ("AvgT72", "Average dry-bulb temperature over the 72-hour period (°C)"), ("AvgT72_F", "Average dry-bulb over 72 hours (°F)"), ("AvgT_noon_6", "Average dry-bulb from noon to 6 PM over the 3 days (°C)"), ("AvgT_noon_6_F", "Average dry-bulb noon-6 PM (°F)"), ("TotalScore", "Equal-weight index: PeakT + AvgT72 + AvgT_noon_6 (°C)"), ("TotalScore_F", "Same index in °F (PeakT_F + AvgT72_F + AvgT_noon_6_F)"), ] # Observed holidays in 2009 (from DEER/Title 24 2009 calendar) CA_HOLIDAYS_2009 = [ datetime(2009, 1, 1), # New Year's Day datetime(2009, 1, 19), # Martin Luther King Day datetime(2009, 2, 16), # Presidents Day datetime(2009, 5, 25), # Memorial Day datetime(2009, 7, 3), # July 4 observed (July 4 was Saturday) datetime(2009, 9, 7), # Labor Day datetime(2009, 10, 12), # Columbus Day datetime(2009, 11, 11), # Veterans Day datetime(2009, 11, 26), # Thanksgiving Day datetime(2009, 12, 25), # Christmas ] # CZ2025 station mapping: BldgLoc -> EPW filename (must match WEATHER_DIR contents) CZ2025_EPW_FILES = { "CZ01": "CA_CZ01_ARCATA-AP_725945_CZ2025.epw", "CZ02": "CA_CZ02_SANTA-ROSA(AWOS)_724957_CZ2025.epw", "CZ03": "CA_CZ03_OAKLAND-METRO-AP_724930_CZ2025.epw", "CZ04": "CA_CZ04_PASO-ROBLES-MUNI-AP_723965_CZ2025.epw", "CZ05": "CA_CZ05_SANTA-MARIA-PUBLIC-AP_723940_CZ2025.epw", "CZ06": "CA_CZ06_LOS-ANGELES-IAP_722950_CZ2025.epw", "CZ07": "CA_CZ07_SAN-DIEGO-LINDBERGH-FIELD_722900_CZ2025.epw", "CZ08": "CA_CZ08_FULLERTON-MUNI-AP_722976_CZ2025.epw", "CZ09": "CA_CZ09_BURBANK-GLNDLE-PASAD-AP_722880_CZ2025.epw", "CZ10": "CA_CZ10_RIVERSIDE-MUNI_722869_CZ2025.epw", "CZ11": "CA_CZ11_RED-BLUFF-MUNI-AP_725910_CZ2025.epw", "CZ12": "CA_CZ12_SACRAMENTO-EXECUTIVE-AP_724830_CZ2025.epw", "CZ13": "CA_CZ13_FRESNO-YOSEMITE-IAP_723890_CZ2025.epw", "CZ14": "CA_CZ14_PALMDALE-AP_723820_CZ2025.epw", "CZ15": "CA_CZ15_PALM-SPRINGS-IAP_722868_CZ2025.epw", "CZ16": "CA_CZ16_BLUE-CANYON-AP_725845_CZ2025.epw", } EPW_COLUMNS = [ "year", "month", "day", "hour", "minute", "data_source", "dry_bulb_temp", "dew_point_temp", "relative_humidity", "atmospheric_pressure", "extraterrestrial_horizontal_radiation", "extraterrestrial_direct_normal_radiation", "horizontal_infrared_radiation", "global_horizontal_radiation", "direct_normal_radiation", "diffuse_horizontal_radiation", "global_horizontal_illuminance", "direct_normal_illuminance", "diffuse_horizontal_illuminance", "zenith_luminance", "wind_direction", "wind_speed", "total_sky_cover", "opaque_sky_cover", "visibility", "ceiling_height", "present_weather_observation", "present_weather_codes", "precipitable_water", "aerosol_optical_depth", "snow_depth", "days_since_last_snowfall", "albedo", "liquid_precipitation_depth", "liquid_precipitation_quantity", ] def celsius_to_fahrenheit(c): return (c * 9 / 5) + 32 def read_epw_from_path(epw_path): """Read EPW from file path.""" df = pd.read_csv(epw_path, skiprows=8, header=None, names=EPW_COLUMNS) return _epw_to_dataframe(df) def _epw_to_dataframe(df): df["datetime"] = pd.to_datetime( df[["year", "month", "day", "hour"]].astype(int), format="%Y%m%d%H" ) df.set_index("datetime", inplace=True) df.sort_index(inplace=True) return df def is_valid_3day_start_2009(month, day): """ True if (month, day) in 2009 can start a 3-day period: Mon/Tue/Wed and all three days are weekdays and not in CA_HOLIDAYS_2009. """ d = datetime(2009, month, day) if d.weekday() > 2: # only Mon=0, Tue=1, Wed=2 return False for i in range(3): check = d + timedelta(days=i) if check.weekday() > 4 or check in CA_HOLIDAYS_2009: return False return True def get_72h_data(df, start_month, start_day, start_hour=0): """Get 72 consecutive hours from TMY by month/day/hour (year-agnostic).""" cur = datetime(2000, start_month, start_day, start_hour) needed = [] for _ in range(72): needed.append((cur.month, cur.day, cur.hour)) cur += timedelta(hours=1) mask = df.apply( lambda r: (r.name.month, r.name.day, r.name.hour) in needed, axis=1 ) period = df[mask] if len(period) < 72: return None order = {mdh: i for i, mdh in enumerate(needed)} period = period.copy() period["_seq"] = period.apply( lambda r: order.get((r.name.month, r.name.day, r.name.hour), 999), axis=1 ) period = period.sort_values("_seq") return period def score_3day(df, start_month, start_day): """ Equal-weight index = PeakT + AvgT72 + AvgT_noon_6 (all °C). Returns dict with PeakT, AvgT72, AvgT_noon_6, TotalScore, PeakTempHr, PkDayOffset or None. PeakTempHr = hour of day (0-23) when PeakT occurred; PkDayOffset = 0/1/2 (which day in the 3-day window). """ period = get_72h_data(df, start_month, start_day) if period is None: return None avg_t72 = period["dry_bulb_temp"].mean() noon_6 = period[period.index.hour.isin([12, 13, 14, 15, 16, 17])]["dry_bulb_temp"].mean() peak_t = period["dry_bulb_temp"].max() peak_idx = period["dry_bulb_temp"].idxmax() start_norm = period.index.min().normalize() peak_hour = peak_idx.hour day_offset = (peak_idx.normalize() - start_norm).days total = peak_t + avg_t72 + noon_6 return { "PeakT": peak_t, "AvgT72": avg_t72, "AvgT_noon_6": noon_6, "TotalScore": total, "PeakTempHr": peak_hour, "PkDayOffset": day_offset, } def find_peak_3day(df, climate_zone, weather_filename): """Find 3-day period in June–Sept with highest equal-weight index (2009 calendar).""" june_sept = df[(df.index.month >= 6) & (df.index.month <= 9)] dates = june_sept.index.normalize().unique() best = None best_score = -np.inf for dt in dates: m, d = dt.month, dt.day if not is_valid_3day_start_2009(m, d): continue s = score_3day(june_sept, m, d) if s is None or s["TotalScore"] <= best_score: continue best_score = s["TotalScore"] pk_2009 = datetime(2009, m, d) best = { "BldgLoc": climate_zone, "PkDay": pk_2009.timetuple().tm_yday, "PkHr": 0, "PeakTempHr": s["PeakTempHr"], "PkDayOffset": s["PkDayOffset"], "PkDate": f"2009-{m:02d}-{d:02d}", "SourceWeatherSet": weather_filename, "DayOfWeek": pk_2009.strftime("%A"), "PeakT": round(s["PeakT"], 2), "PeakT_F": round(celsius_to_fahrenheit(s["PeakT"]), 2), "AvgT72": round(s["AvgT72"], 2), "AvgT72_F": round(celsius_to_fahrenheit(s["AvgT72"]), 2), "AvgT_noon_6": round(s["AvgT_noon_6"], 2), "AvgT_noon_6_F": round(celsius_to_fahrenheit(s["AvgT_noon_6"]), 2), "TotalScore": round(s["TotalScore"], 2), "TotalScore_F": round( celsius_to_fahrenheit(s["PeakT"]) + celsius_to_fahrenheit(s["AvgT72"]) + celsius_to_fahrenheit(s["AvgT_noon_6"]), 2, ), } return best def main(): print("DEER Peak 3-Day Period Analysis (CZ2025)") print("2009 calendar, full holiday list, equal-weight index (PeakT + AvgT72 + AvgT_noon_6)\n") print(f"Weather folder: {WEATHER_DIR}") print(f"Output: {OUTPUT_CSV}\n") if not os.path.isdir(WEATHER_DIR): print(f"Error: WEATHER_DIR not found: {WEATHER_DIR}") return results = [] for bldg_loc, epw_name in CZ2025_EPW_FILES.items(): path = os.path.join(WEATHER_DIR, epw_name) if not os.path.isfile(path): print(f" {bldg_loc}: file not found {epw_name}") continue try: df = read_epw_from_path(path) except Exception as e: print(f" {bldg_loc}: error reading {epw_name}: {e}") continue row = find_peak_3day(df, bldg_loc, epw_name) if row: results.append(row) print(f" {bldg_loc}: {row['PkDate']} ({row['DayOfWeek']}) Score={row['TotalScore']:.2f}") else: print(f" {bldg_loc}: no valid 3-day period") out = pd.DataFrame(results) with open(OUTPUT_CSV, "w", newline="", encoding="utf-8-sig") as f: f.write("# Data dictionary (column descriptions)\n") for col, desc in CSV_COLUMN_DESCRIPTIONS: f.write(f"# {col}: {desc}\n") f.write("\n") out.to_csv(f, index=False) print(f"\nSaved: {OUTPUT_CSV}") print(out.to_string(index=False)) if __name__ == "__main__": main()