2024-12-15 14:40:03 +01:00
""" Weather Forecast.
This module provides classes and methods to retrieve , manage , and process weather forecast data
from various online sources . It includes structured representations of weather data and utilities
for fetching forecasts for specific locations and time ranges . By integrating multiple data sources ,
the module enables flexible access to weather information based on latitude , longitude , and
desired time periods .
Notes :
- Supported weather sources can be expanded by adding new fetch methods within the
WeatherForecast class .
- Ensure appropriate API keys or configurations are set up if required by external data sources .
"""
import re
from typing import Dict , List , Optional , Tuple
import pandas as pd
import requests
from bs4 import BeautifulSoup
2025-01-05 14:41:07 +01:00
from akkudoktoreos . core . logging import get_logger
2024-12-15 14:40:03 +01:00
from akkudoktoreos . prediction . weatherabc import WeatherDataRecord , WeatherProvider
from akkudoktoreos . utils . cacheutil import cache_in_file
from akkudoktoreos . utils . datetimeutil import to_datetime , to_duration , to_timezone
logger = get_logger ( __name__ )
WheaterDataClearOutsideMapping : List [ Tuple [ str , Optional [ str ] , Optional [ float ] ] ] = [
# clearoutside_key, description, corr_factor
( " DateTime " , " DateTime " , None ) ,
( " Total Clouds ( % Sky Obscured) " , " Total Clouds ( % Sky Obscured) " , 1 ) ,
( " Low Clouds ( % Sky Obscured) " , " Low Clouds ( % Sky Obscured) " , 1 ) ,
( " Medium Clouds ( % Sky Obscured) " , " Medium Clouds ( % Sky Obscured) " , 1 ) ,
( " High Clouds ( % Sky Obscured) " , " High Clouds ( % Sky Obscured) " , 1 ) ,
( " ISS Passover " , None , None ) ,
( " Visibility (miles) " , " Visibility (m) " , 1609.34 ) ,
( " Fog ( % ) " , " Fog ( % ) " , 1 ) ,
( " Precipitation Type " , " Precipitation Type " , None ) ,
( " Precipitation Probability ( % ) " , " Precipitation Probability ( % ) " , 1 ) ,
( " Precipitation Amount (mm) " , " Precipitation Amount (mm) " , 1 ) ,
( " Wind Speed (mph) " , " Wind Speed (kmph) " , 1.60934 ) ,
( " Chance of Frost " , " Chance of Frost " , None ) ,
( " Temperature (°C) " , " Temperature (°C) " , 1 ) ,
( " Feels Like (°C) " , " Feels Like (°C) " , 1 ) ,
( " Dew Point (°C) " , " Dew Point (°C) " , 1 ) ,
( " Relative Humidity ( % ) " , " Relative Humidity ( % ) " , 1 ) ,
( " Pressure (mb) " , " Pressure (mb) " , 1 ) ,
( " Ozone (du) " , " Ozone (du) " , 1 ) ,
# Extra extraction
( " Wind Direction (°) " , " Wind Direction (°) " , 1 ) ,
# Generated from above
( " Preciptable Water (cm) " , " Preciptable Water (cm) " , 1 ) ,
( " Global Horizontal Irradiance (W/m2) " , " Global Horizontal Irradiance (W/m2) " , 1 ) ,
( " Direct Normal Irradiance (W/m2) " , " Direct Normal Irradiance (W/m2) " , 1 ) ,
( " Diffuse Horizontal Irradiance (W/m2) " , " Diffuse Horizontal Irradiance (W/m2) " , 1 ) ,
]
""" Mapping of ClearOutside weather data keys to WeatherDataRecord field description.
A list of tuples : ( ClearOutside key , field description , correction factor ) .
"""
class WeatherClearOutside ( WeatherProvider ) :
""" Retrieves and processes weather forecast data from ClearOutside.
WeatherClearOutside is a thread - safe singleton , ensuring only one instance of this class is created .
Attributes :
prediction_hours ( int , optional ) : The number of hours into the future for which predictions are generated .
prediction_historic_hours ( int , optional ) : The number of past hours for which historical data is retained .
latitude ( float , optional ) : The latitude in degrees , must be within - 90 to 90.
longitude ( float , optional ) : The longitude in degrees , must be within - 180 to 180.
start_datetime ( datetime , optional ) : The starting datetime for predictions , defaults to the current datetime if unspecified .
end_datetime ( datetime , computed ) : The datetime representing the end of the prediction range ,
calculated based on ` start_datetime ` and ` prediction_hours ` .
keep_datetime ( datetime , computed ) : The earliest datetime for retaining historical data , calculated
based on ` start_datetime ` and ` prediction_historic_hours ` .
"""
@classmethod
def provider_id ( cls ) - > str :
return " ClearOutside "
@cache_in_file ( with_ttl = " 1 hour " )
def _request_forecast ( self ) - > requests . Response :
""" Requests weather forecast from ClearOutside.
Returns :
2025-03-29 08:39:30 +09:00
response : Weather forecast request response from ClearOutside .
2024-12-15 14:40:03 +01:00
"""
source = " https://clearoutside.com/forecast "
latitude = round ( self . config . latitude , 2 )
longitude = round ( self . config . longitude , 2 )
response = requests . get ( f " { source } / { latitude } / { longitude } ?desktop=true " )
response . raise_for_status ( ) # Raise an error for bad responses
logger . debug ( f " Response from { source } : { response } " )
# We are working on fresh data (no cache), report update time
self . update_datetime = to_datetime ( in_timezone = self . config . timezone )
return response
def _update_data ( self , force_update : Optional [ bool ] = None ) - > None :
""" Scrape weather forecast data from ClearOutside ' s website.
This method requests weather forecast data from ClearOutside based on latitude
and longitude , then processes and structures this data for further use in analysis .
The forecast data includes a variety of weather parameters such as cloud cover , temperature ,
humidity , visibility , precipitation , wind speed , and additional irradiance values
calculated using the cloud cover data .
Raises :
ValueError : If the HTML structure of ClearOutside ' s website changes, causing
extraction issues with forecast dates , timezone , or expected data sections .
Note :
- The function partly builds on code from https : / / github . com / davidusb - geek / emhass / blob / master / src / emhass / forecast . py ( MIT License ) .
- Uses ` pvlib ` to estimate irradiance ( GHI , DNI , DHI ) based on cloud cover data .
Workflow :
1. * * Retrieve Web Content * * : Uses a helper method to fetch or retrieve cached ClearOutside HTML content .
2. * * Extract Forecast Date and Timezone * * :
- Parses the forecast ' s start and end dates and the UTC offset from the " Generated " header.
3. * * Extract Weather Data * * :
- For each day in the 7 - day forecast , the function finds detailed weather parameters
and associates values for each hour .
- Parameters include cloud cover , temperature , humidity , visibility , and precipitation type , among others .
4. * * Irradiance Calculation * * :
- Calculates irradiance ( GHI , DNI , DHI ) values using cloud cover data and the ` pvlib ` library .
5. * * Store Data * * :
- Combines all hourly data into ` WeatherDataRecord ` objects , with keys
standardized according to ` WeatherDataRecord ` attributes .
"""
# Get ClearOutside web content - either from site or cached
response = self . _request_forecast ( force_update = force_update ) # type: ignore
# Scrape the data
soup = BeautifulSoup ( response . content , " html.parser " )
# Find generation data
p_generated = soup . find ( " h2 " , string = lambda text : text and text . startswith ( " Generated: " ) )
if not p_generated :
error_msg = f " Clearoutside schema change. Could not get ' <h2>Generated: ' , got { p_generated } from { str ( response . content ) } . "
logger . error ( error_msg )
raise ValueError ( error_msg )
# Extract forecast start and end dates
forecast_pattern = r " Forecast: ( \ d {2} / \ d {2} / \ d {2} ) to ( \ d {2} / \ d {2} / \ d {2} ) "
forecast_match = re . search ( forecast_pattern , p_generated . get_text ( ) )
if forecast_match :
forecast_start_date = forecast_match . group ( 1 )
forecast_end_date = forecast_match . group ( 2 )
else :
error_msg = f " Clearoutside schema change. Could not extract forecast start and end dates from { p_generated } . "
logger . error ( error_msg )
raise ValueError ( error_msg )
# Extract timezone offset
timezone_pattern = r " Timezone: UTC([+-] \ d+) \ .( \ d+) "
timezone_match = re . search ( timezone_pattern , p_generated . get_text ( ) )
if timezone_match :
hours = int ( timezone_match . group ( 1 ) )
# Convert the decimal part to minutes (e.g., .50 -> 30 minutes)
minutes = int ( timezone_match . group ( 2 ) ) * 6 # Multiply by 6 to convert to minutes
# Create the timezone object using offset
utc_offset = float ( hours ) + float ( minutes ) / 60.0
forecast_timezone = to_timezone ( utc_offset = utc_offset )
else :
error_msg = " Clearoutside schema change. Could not extract forecast timezone. "
logger . error ( error_msg )
raise ValueError ( error_msg )
forecast_start_datetime = to_datetime (
forecast_start_date , in_timezone = forecast_timezone , to_maxtime = False
)
# Get key mapping from description
clearoutside_key_mapping : Dict [ str , Tuple [ Optional [ str ] , Optional [ float ] ] ] = { }
for clearoutside_key , description , corr_factor in WheaterDataClearOutsideMapping :
if description is None :
clearoutside_key_mapping [ clearoutside_key ] = ( None , None )
continue
weatherdata_key = WeatherDataRecord . key_from_description ( description )
if weatherdata_key is None :
# Should not happen
error_msg = f " No WeatherDataRecord key for ' { description } ' "
logger . error ( error_msg )
raise ValueError ( error_msg )
clearoutside_key_mapping [ clearoutside_key ] = ( weatherdata_key , corr_factor )
# Find all paragraphs with id 'day_<x>'. There should be seven.
p_days = soup . find_all ( id = re . compile ( r " day_[0-9] " ) )
if len ( p_days ) != 7 :
error_msg = f " Clearoutside schema change. Found { len ( p_days ) } day tables, expected 7. "
logger . error ( error_msg )
raise ValueError ( error_msg )
# Delete all records that will be newly added
self . delete_by_datetime ( start_datetime = forecast_start_datetime )
# Collect weather data, loop over all days
for day , p_day in enumerate ( p_days ) :
# Within day_x paragraph find the details labels
p_detail_labels = p_day . find_all ( class_ = " fc_detail_label " )
detail_names = [ p . get_text ( ) for p in p_detail_labels ]
# Check for schema changes
if len ( detail_names ) < 18 :
error_msg = f " Clearoutside schema change. Unexpected number ( { len ( detail_names ) } ) of `fc_detail_label`. "
logger . error ( error_msg )
raise ValueError ( error_msg )
for detail_name in detail_names :
if detail_name not in clearoutside_key_mapping :
warning_msg = (
f " Clearoutside schema change. Unexpected detail name { detail_name } . "
)
logger . warning ( warning_msg )
# Find all the paragraphs that are associated to the details.
# Beware there is one ul paragraph before that is not associated to a detail
p_detail_tables = p_day . find_all ( " ul " )
if len ( p_detail_tables ) != len ( detail_names ) + 1 :
error_msg = f " Clearoutside schema change. Unexpected number ( { p_detail_tables } ) of `ul` for details { len ( detail_names ) } . Should be one extra only. "
logger . error ( error_msg )
raise ValueError ( error_msg )
p_detail_tables . pop ( 0 )
# Create clearout data
clearout_data = { }
# Replace some detail names that we use differently
detail_names = [
s . replace ( " Wind Speed/Direction (mph) " , " Wind Speed (mph) " ) for s in detail_names
]
# Number of detail values. On last day may be less than 24.
detail_values_count = None
# Add data values
scrape_detail_names = detail_names . copy ( ) # do not change list during iteration!
for i , detail_name in enumerate ( scrape_detail_names ) :
p_detail_values = p_detail_tables [ i ] . find_all ( " li " )
# Assure the number of values fits
p_detail_values_count = len ( p_detail_values )
if ( day == 6 and p_detail_values_count > 24 ) or (
day < 6 and p_detail_values_count != 24
) :
error_msg = f " Clearoutside schema change. Unexpected number ( { p_detail_values_count } ) of `li` for detail ` { detail_name } ` data. Should be 24 or less on day 7. Table is ` { p_detail_tables [ i ] } `. "
logger . error ( error_msg )
raise ValueError ( error_msg )
if detail_values_count is None :
# Remember detail values count only once
detail_values_count = p_detail_values_count
if p_detail_values_count != detail_values_count :
# Value count for details differ.
error_msg = f " Clearoutside schema change. Number ( { p_detail_values_count } ) of `li` for detail ` { detail_name } ` data is different than last one { detail_values_count } . Table is ` { p_detail_tables [ i ] } `. "
logger . error ( error_msg )
raise ValueError ( error_msg )
# Scrape the detail values
detail_data = [ ]
extra_detail_name = None
extra_detail_data = [ ]
for p_detail_value in p_detail_values :
if detail_name == " Wind Speed (mph) " :
# Get the usual value
value_str = p_detail_value . get_text ( )
# Also extract extra data
extra_detail_name = " Wind Direction (°) "
extra_value = None
match = re . search ( r " ( \ d+)° " , str ( p_detail_value ) )
if match :
extra_value = float ( match . group ( 1 ) )
else :
error_msg = f " Clearoutside schema change. Can ' t extract direction angle from ` { p_detail_value } ` for detail ` { extra_detail_name } `. Table is ` { p_detail_tables [ i ] } `. "
logger . error ( error_msg )
raise ValueError ( error_msg )
extra_detail_data . append ( extra_value )
elif (
detail_name in ( " Precipitation Type " , " Chance of Frost " )
and hasattr ( p_detail_value , " title " )
and p_detail_value . title
) :
value_str = p_detail_value . title . string
else :
value_str = p_detail_value . get_text ( )
try :
value = float ( value_str )
except ValueError :
value = value_str
detail_data . append ( value )
clearout_data [ detail_name ] = detail_data
if extra_detail_name :
if extra_detail_name not in detail_names :
detail_names . append ( extra_detail_name )
clearout_data [ extra_detail_name ] = extra_detail_data
logger . debug ( f " Added extra data { extra_detail_name } with { extra_detail_data } " )
# Add datetimes of the scrapped data
clearout_data [ " DateTime " ] = [
forecast_start_datetime + to_duration ( f " { day } days { i } hours " )
for i in range ( 0 , detail_values_count ) # type: ignore[arg-type]
]
detail_names . append ( " DateTime " )
# Converting the cloud cover into Irradiance (GHI, DNI, DHI)
cloud_cover = pd . Series (
data = clearout_data [ " Total Clouds ( % Sky Obscured) " ] , index = clearout_data [ " DateTime " ]
)
ghi , dni , dhi = self . estimate_irradiance_from_cloud_cover (
self . config . latitude , self . config . longitude , cloud_cover
)
# Add GHI, DNI, DHI to clearout data
clearout_data [ " Global Horizontal Irradiance (W/m2) " ] = ghi
detail_names . append ( " Global Horizontal Irradiance (W/m2) " )
clearout_data [ " Direct Normal Irradiance (W/m2) " ] = dni
detail_names . append ( " Direct Normal Irradiance (W/m2) " )
clearout_data [ " Diffuse Horizontal Irradiance (W/m2) " ] = dhi
detail_names . append ( " Diffuse Horizontal Irradiance (W/m2) " )
# Add Preciptable Water (PWAT) with a PVLib method.
clearout_data [ " Preciptable Water (cm) " ] = self . estimate_preciptable_water (
pd . Series ( data = clearout_data [ " Temperature (°C) " ] ) ,
pd . Series ( data = clearout_data [ " Relative Humidity ( % ) " ] ) ,
) . to_list ( )
detail_names . append ( " Preciptable Water (cm) " )
# Add weather data
# Add the records from clearout
for row_index in range ( 0 , len ( clearout_data [ " DateTime " ] ) ) :
weather_record = WeatherDataRecord ( )
for detail_name in detail_names :
key = clearoutside_key_mapping [ detail_name ] [ 0 ]
if key is None :
continue
if detail_name in clearout_data :
value = clearout_data [ detail_name ] [ row_index ]
corr_factor = clearoutside_key_mapping [ detail_name ] [ 1 ]
if corr_factor :
value = value * corr_factor
setattr ( weather_record , key , value )
self . insert_by_datetime ( weather_record )