EOS/modules/class_load_corrector.py

208 lines
9.0 KiB
Python
Raw Normal View History

import json,sys, os
from datetime import datetime, timedelta, timezone
import numpy as np
from pprint import pprint
import pandas as pd
import matplotlib.pyplot as plt
# from sklearn.model_selection import train_test_split, GridSearchCV
# from sklearn.ensemble import GradientBoostingRegressor
# from xgboost import XGBRegressor
# from statsmodels.tsa.statespace.sarimax import SARIMAX
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense, LSTM
# from tensorflow.keras.optimizers import Adam
# from sklearn.preprocessing import MinMaxScaler
# from sklearn.metrics import mean_squared_error, r2_score
import mariadb
# from sqlalchemy import create_engine
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score
# Fügen Sie den übergeordneten Pfad zum sys.path hinzu
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from config import *
from modules.class_load import *
class LoadPredictionAdjuster:
def __init__(self, measured_data, predicted_data, load_forecast):
self.measured_data = measured_data
self.predicted_data = predicted_data
self.load_forecast = load_forecast
self.merged_data = self._merge_data()
self.train_data = None
self.test_data = None
self.weekday_diff = None
self.weekend_diff = None
def _remove_outliers(self, data, threshold=2):
# Berechne den Z-Score der 'Last'-Daten
data['Z-Score'] = np.abs((data['Last'] - data['Last'].mean()) / data['Last'].std())
# Filtere die Daten nach dem Schwellenwert
filtered_data = data[data['Z-Score'] < threshold]
return filtered_data.drop(columns=['Z-Score'])
def _merge_data(self):
# Konvertiere die Zeitspalte in beiden Datenrahmen zu datetime
self.predicted_data['time'] = pd.to_datetime(self.predicted_data['time'])
self.measured_data['time'] = pd.to_datetime(self.measured_data['time'])
# Stelle sicher, dass beide Zeitspalten dieselbe Zeitzone haben
# Measured Data: Setze die Zeitzone auf UTC, falls es tz-naiv ist
if self.measured_data['time'].dt.tz is None:
self.measured_data['time'] = self.measured_data['time'].dt.tz_localize('UTC')
# Predicted Data: Setze ebenfalls UTC und konvertiere anschließend in die lokale Zeitzone
self.predicted_data['time'] = self.predicted_data['time'].dt.tz_localize('UTC').dt.tz_convert('Europe/Berlin')
self.measured_data['time'] = self.measured_data['time'].dt.tz_convert('Europe/Berlin')
# Optional: Entferne die Zeitzoneninformation, wenn du nur lokal arbeiten möchtest
self.predicted_data['time'] = self.predicted_data['time'].dt.tz_localize(None)
self.measured_data['time'] = self.measured_data['time'].dt.tz_localize(None)
# Jetzt kannst du den Merge durchführen
merged_data = pd.merge(self.measured_data, self.predicted_data, on='time', how='inner')
print(merged_data)
merged_data['Hour'] = merged_data['time'].dt.hour
merged_data['DayOfWeek'] = merged_data['time'].dt.dayofweek
return merged_data
def calculate_weighted_mean(self, train_period_weeks=9, test_period_weeks=1):
self.merged_data = self._remove_outliers(self.merged_data)
train_end_date = self.merged_data['time'].max() - pd.Timedelta(weeks=test_period_weeks)
train_start_date = train_end_date - pd.Timedelta(weeks=train_period_weeks)
test_start_date = train_end_date + pd.Timedelta(hours=1)
test_end_date = test_start_date + pd.Timedelta(weeks=test_period_weeks) - pd.Timedelta(hours=1)
self.train_data = self.merged_data[(self.merged_data['time'] >= train_start_date) & (self.merged_data['time'] <= train_end_date)]
self.test_data = self.merged_data[(self.merged_data['time'] >= test_start_date) & (self.merged_data['time'] <= test_end_date)]
self.train_data['Difference'] = self.train_data['Last'] - self.train_data['Last Pred']
weekdays_train_data = self.train_data[self.train_data['DayOfWeek'] < 5]
weekends_train_data = self.train_data[self.train_data['DayOfWeek'] >= 5]
self.weekday_diff = weekdays_train_data.groupby('Hour').apply(self._weighted_mean_diff).dropna()
self.weekend_diff = weekends_train_data.groupby('Hour').apply(self._weighted_mean_diff).dropna()
def _weighted_mean_diff(self, data):
train_end_date = self.train_data['time'].max()
weights = 1 / (train_end_date - data['time']).dt.days.replace(0, np.nan)
weighted_mean = (data['Difference'] * weights).sum() / weights.sum()
return weighted_mean
def adjust_predictions(self):
self.train_data['Adjusted Pred'] = self.train_data.apply(self._adjust_row, axis=1)
self.test_data['Adjusted Pred'] = self.test_data.apply(self._adjust_row, axis=1)
def _adjust_row(self, row):
if row['DayOfWeek'] < 5:
return row['Last Pred'] + self.weekday_diff.get(row['Hour'], 0)
else:
return row['Last Pred'] + self.weekend_diff.get(row['Hour'], 0)
def plot_results(self):
self._plot_data(self.train_data, 'Training')
self._plot_data(self.test_data, 'Testing')
def _plot_data(self, data, data_type):
plt.figure(figsize=(14, 7))
plt.plot(data['time'], data['Last'], label=f'Actual Last - {data_type}', color='blue')
plt.plot(data['time'], data['Last Pred'], label=f'Predicted Last - {data_type}', color='red', linestyle='--')
plt.plot(data['time'], data['Adjusted Pred'], label=f'Adjusted Predicted Last - {data_type}', color='green', linestyle=':')
plt.xlabel('Time')
plt.ylabel('Load')
plt.title(f'Actual vs Predicted vs Adjusted Predicted Load ({data_type} Data)')
plt.legend()
plt.grid(True)
plt.show()
def evaluate_model(self):
mse = mean_squared_error(self.test_data['Last'], self.test_data['Adjusted Pred'])
r2 = r2_score(self.test_data['Last'], self.test_data['Adjusted Pred'])
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')
def predict_next_hours(self, hours_ahead):
last_date = self.merged_data['time'].max()
future_dates = [last_date + pd.Timedelta(hours=i) for i in range(1, hours_ahead + 1)]
future_df = pd.DataFrame({'time': future_dates})
future_df['Hour'] = future_df['time'].dt.hour
future_df['DayOfWeek'] = future_df['time'].dt.dayofweek
future_df['Last Pred'] = future_df['time'].apply(self._forecast_next_hours)
future_df['Adjusted Pred'] = future_df.apply(self._adjust_row, axis=1)
return future_df
def _forecast_next_hours(self, timestamp):
date_str = timestamp.strftime('%Y-%m-%d')
hour = timestamp.hour
daily_forecast = self.load_forecast.get_daily_stats(date_str)
return daily_forecast[0][hour] if hour < len(daily_forecast[0]) else np.nan
# if __name__ == '__main__':
# estimator = LastEstimator()
# start_date = "2024-06-01"
# end_date = "2024-08-01"
# last_df = estimator.get_last(start_date, end_date)
# selected_columns = last_df[['timestamp', 'Last']]
# selected_columns['time'] = pd.to_datetime(selected_columns['timestamp']).dt.floor('H')
# selected_columns['Last'] = pd.to_numeric(selected_columns['Last'], errors='coerce')
# # Drop rows with NaN values
# cleaned_data = selected_columns.dropna()
# print(cleaned_data)
# # Create an instance of LoadForecast
# lf = LoadForecast(filepath=r'.\load_profiles.npz', year_energy=6000*1000)
# # Initialize an empty DataFrame to hold the forecast data
# forecast_list = []
# # Loop through each day in the date range
# for single_date in pd.date_range(cleaned_data['time'].min().date(), cleaned_data['time'].max().date()):
# date_str = single_date.strftime('%Y-%m-%d')
# daily_forecast = lf.get_daily_stats(date_str)
# mean_values = daily_forecast[0] # Extract the mean values
# hours = [single_date + pd.Timedelta(hours=i) for i in range(24)]
# daily_forecast_df = pd.DataFrame({'time': hours, 'Last Pred': mean_values})
# forecast_list.append(daily_forecast_df)
# # Concatenate all daily forecasts into a single DataFrame
# forecast_df = pd.concat(forecast_list, ignore_index=True)
# # Create an instance of the LoadPredictionAdjuster class
# adjuster = LoadPredictionAdjuster(cleaned_data, forecast_df, lf)
# # Calculate the weighted mean differences
# adjuster.calculate_weighted_mean()
# # Adjust the predictions
# adjuster.adjust_predictions()
# # Plot the results
# adjuster.plot_results()
# # Evaluate the model
# adjuster.evaluate_model()
# # Predict the next x hours
# future_predictions = adjuster.predict_next_hours(48)
# print(future_predictions)