Update class_load_corrector.py

initial clean up. translations, imports cleaned and sorted
This commit is contained in:
NormannK 2024-09-20 13:01:34 +02:00 committed by Andreas
parent 8075515e8f
commit 2df409695a

View File

@ -1,9 +1,14 @@
import json,sys, os import json
import os
import sys
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
import numpy as np import numpy as np
from pprint import pprint
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score
import mariadb
# from sklearn.model_selection import train_test_split, GridSearchCV # from sklearn.model_selection import train_test_split, GridSearchCV
# from sklearn.ensemble import GradientBoostingRegressor # from sklearn.ensemble import GradientBoostingRegressor
# from xgboost import XGBRegressor # from xgboost import XGBRegressor
@ -12,17 +17,14 @@ import matplotlib.pyplot as plt
# from tensorflow.keras.layers import Dense, LSTM # from tensorflow.keras.layers import Dense, LSTM
# from tensorflow.keras.optimizers import Adam # from tensorflow.keras.optimizers import Adam
# from sklearn.preprocessing import MinMaxScaler # from sklearn.preprocessing import MinMaxScaler
# from sklearn.metrics import mean_squared_error, r2_score
import mariadb
# from sqlalchemy import create_engine # from sqlalchemy import create_engine
import numpy as np
import matplotlib.pyplot as plt # Add the parent directory to sys.path
from sklearn.metrics import mean_squared_error, r2_score
# Fügen Sie den übergeordneten Pfad zum sys.path hinzu
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from config import * from config import *
from modules.class_load import * from modules.class_load import *
class LoadPredictionAdjuster: class LoadPredictionAdjuster:
def __init__(self, measured_data, predicted_data, load_forecast): def __init__(self, measured_data, predicted_data, load_forecast):
self.measured_data = measured_data self.measured_data = measured_data
@ -34,41 +36,39 @@ class LoadPredictionAdjuster:
self.weekday_diff = None self.weekday_diff = None
self.weekend_diff = None self.weekend_diff = None
def _remove_outliers(self, data, threshold=2): def _remove_outliers(self, data, threshold=2):
# Berechne den Z-Score der 'Last'-Daten # Calculate the Z-Score of the 'Last' data
data['Z-Score'] = np.abs((data['Last'] - data['Last'].mean()) / data['Last'].std()) data['Z-Score'] = np.abs((data['Last'] - data['Last'].mean()) / data['Last'].std())
# Filtere die Daten nach dem Schwellenwert # Filter the data based on the threshold
filtered_data = data[data['Z-Score'] < threshold] filtered_data = data[data['Z-Score'] < threshold]
return filtered_data.drop(columns=['Z-Score']) return filtered_data.drop(columns=['Z-Score'])
def _merge_data(self): def _merge_data(self):
# Konvertiere die Zeitspalte in beiden Datenrahmen zu datetime # Convert the time column in both DataFrames to datetime
self.predicted_data['time'] = pd.to_datetime(self.predicted_data['time']) self.predicted_data['time'] = pd.to_datetime(self.predicted_data['time'])
self.measured_data['time'] = pd.to_datetime(self.measured_data['time']) self.measured_data['time'] = pd.to_datetime(self.measured_data['time'])
# Stelle sicher, dass beide Zeitspalten dieselbe Zeitzone haben # Ensure both time columns have the same timezone
# Measured Data: Setze die Zeitzone auf UTC, falls es tz-naiv ist
if self.measured_data['time'].dt.tz is None: if self.measured_data['time'].dt.tz is None:
self.measured_data['time'] = self.measured_data['time'].dt.tz_localize('UTC') self.measured_data['time'] = self.measured_data['time'].dt.tz_localize('UTC')
# Predicted Data: Setze ebenfalls UTC und konvertiere anschließend in die lokale Zeitzone self.predicted_data['time'] = (
self.predicted_data['time'] = self.predicted_data['time'].dt.tz_localize('UTC').dt.tz_convert('Europe/Berlin') self.predicted_data['time'].dt.tz_localize('UTC')
.dt.tz_convert('Europe/Berlin')
)
self.measured_data['time'] = self.measured_data['time'].dt.tz_convert('Europe/Berlin') self.measured_data['time'] = self.measured_data['time'].dt.tz_convert('Europe/Berlin')
# Optional: Entferne die Zeitzoneninformation, wenn du nur lokal arbeiten möchtest # Optionally: Remove timezone information if only working locally
self.predicted_data['time'] = self.predicted_data['time'].dt.tz_localize(None) self.predicted_data['time'] = self.predicted_data['time'].dt.tz_localize(None)
self.measured_data['time'] = self.measured_data['time'].dt.tz_localize(None) self.measured_data['time'] = self.measured_data['time'].dt.tz_localize(None)
# Jetzt kannst du den Merge durchführen # Now you can perform the merge
merged_data = pd.merge(self.measured_data, self.predicted_data, on='time', how='inner') merged_data = pd.merge(self.measured_data, self.predicted_data, on='time', how='inner')
print(merged_data) print(merged_data)
merged_data['Hour'] = merged_data['time'].dt.hour merged_data['Hour'] = merged_data['time'].dt.hour
merged_data['DayOfWeek'] = merged_data['time'].dt.dayofweek merged_data['DayOfWeek'] = merged_data['time'].dt.dayofweek
return merged_data return merged_data
def calculate_weighted_mean(self, train_period_weeks=9, test_period_weeks=1): def calculate_weighted_mean(self, train_period_weeks=9, test_period_weeks=1):
self.merged_data = self._remove_outliers(self.merged_data) self.merged_data = self._remove_outliers(self.merged_data)
train_end_date = self.merged_data['time'].max() - pd.Timedelta(weeks=test_period_weeks) train_end_date = self.merged_data['time'].max() - pd.Timedelta(weeks=test_period_weeks)
@ -77,8 +77,15 @@ class LoadPredictionAdjuster:
test_start_date = train_end_date + pd.Timedelta(hours=1) test_start_date = train_end_date + pd.Timedelta(hours=1)
test_end_date = test_start_date + pd.Timedelta(weeks=test_period_weeks) - pd.Timedelta(hours=1) test_end_date = test_start_date + pd.Timedelta(weeks=test_period_weeks) - pd.Timedelta(hours=1)
self.train_data = self.merged_data[(self.merged_data['time'] >= train_start_date) & (self.merged_data['time'] <= train_end_date)] self.train_data = self.merged_data[
self.test_data = self.merged_data[(self.merged_data['time'] >= test_start_date) & (self.merged_data['time'] <= test_end_date)] (self.merged_data['time'] >= train_start_date) &
(self.merged_data['time'] <= train_end_date)
]
self.test_data = self.merged_data[
(self.merged_data['time'] >= test_start_date) &
(self.merged_data['time'] <= test_end_date)
]
self.train_data['Difference'] = self.train_data['Last'] - self.train_data['Last Pred'] self.train_data['Difference'] = self.train_data['Last'] - self.train_data['Last Pred']
@ -142,67 +149,53 @@ class LoadPredictionAdjuster:
daily_forecast = self.load_forecast.get_daily_stats(date_str) daily_forecast = self.load_forecast.get_daily_stats(date_str)
return daily_forecast[0][hour] if hour < len(daily_forecast[0]) else np.nan return daily_forecast[0][hour] if hour < len(daily_forecast[0]) else np.nan
# if __name__ == '__main__': # if __name__ == '__main__':
# estimator = LastEstimator()
# start_date = "2024-06-01"
# end_date = "2024-08-01"
# last_df = estimator.get_last(start_date, end_date)
# selected_columns = last_df[['timestamp', 'Last']]
# selected_columns['time'] = pd.to_datetime(selected_columns['timestamp']).dt.floor('H')
# selected_columns['Last'] = pd.to_numeric(selected_columns['Last'], errors='coerce')
# estimator = LastEstimator() # # Drop rows with NaN values
# start_date = "2024-06-01" # cleaned_data = selected_columns.dropna()
# end_date = "2024-08-01"
# last_df = estimator.get_last(start_date, end_date)
# selected_columns = last_df[['timestamp', 'Last']] # print(cleaned_data)
# selected_columns['time'] = pd.to_datetime(selected_columns['timestamp']).dt.floor('H') # # Create an instance of LoadForecast
# selected_columns['Last'] = pd.to_numeric(selected_columns['Last'], errors='coerce') # lf = LoadForecast(filepath=r'.\load_profiles.npz', year_energy=6000*1000)
# # Drop rows with NaN values # # Initialize an empty DataFrame to hold the forecast data
# cleaned_data = selected_columns.dropna() # forecast_list = []
# print(cleaned_data) # # Loop through each day in the date range
# # Create an instance of LoadForecast # for single_date in pd.date_range(cleaned_data['time'].min().date(), cleaned_data['time'].max().date()):
# date_str = single_date.strftime('%Y-%m-%d')
# daily_forecast = lf.get_daily_stats(date_str)
# mean_values = daily_forecast[0] # Extract the mean values
# hours = [single_date + pd.Timedelta(hours=i) for i in range(24)]
# daily_forecast_df = pd.DataFrame({'time': hours, 'Last Pred': mean_values})
# forecast_list.append(daily_forecast_df)
# lf = LoadForecast(filepath=r'.\load_profiles.npz', year_energy=6000*1000) # # Concatenate all daily forecasts into a single DataFrame
# forecast_df = pd.concat(forecast_list, ignore_index=True)
# # Initialize an empty DataFrame to hold the forecast data # # Create an instance of the LoadPredictionAdjuster class
# forecast_list = [] # adjuster = LoadPredictionAdjuster(cleaned_data, forecast_df, lf)
# # Loop through each day in the date range # # Calculate the weighted mean differences
# for single_date in pd.date_range(cleaned_data['time'].min().date(), cleaned_data['time'].max().date()): # adjuster.calculate_weighted_mean()
# date_str = single_date.strftime('%Y-%m-%d')
# daily_forecast = lf.get_daily_stats(date_str)
# mean_values = daily_forecast[0] # Extract the mean values
# hours = [single_date + pd.Timedelta(hours=i) for i in range(24)]
# daily_forecast_df = pd.DataFrame({'time': hours, 'Last Pred': mean_values})
# forecast_list.append(daily_forecast_df)
# # Concatenate all daily forecasts into a single DataFrame # # Adjust the predictions
# forecast_df = pd.concat(forecast_list, ignore_index=True) # adjuster.adjust_predictions()
# # Create an instance of the LoadPredictionAdjuster class # # Plot the results
# adjuster = LoadPredictionAdjuster(cleaned_data, forecast_df, lf) # adjuster.plot_results()
# # Calculate the weighted mean differences # # Evaluate the model
# adjuster.calculate_weighted_mean() # adjuster.evaluate_model()
# # Adjust the predictions # # Predict the next x hours
# adjuster.adjust_predictions() # future_predictions = adjuster.predict_next_hours(48)
# print(future_predictions)
# # Plot the results
# adjuster.plot_results()
# # Evaluate the model
# adjuster.evaluate_model()
# # Predict the next x hours
# future_predictions = adjuster.predict_next_hours(48)
# print(future_predictions)