"""Module defining utility functions for managing model directories and instances.
This module provides functions to create model directories with template configuration
files, transfer setup information from Excel files to configuration files, and save/load
model instances. It leverages libraries such as `dill` for serialization and `pandas`
for data manipulation.
"""
from typing import Optional
import dill
import pandas as pd
from pathlib import Path
from cvxlab.backend.model import Model
from cvxlab.defaults import Defaults
from cvxlab.log_exc.logger import Logger
from cvxlab.support import util
from cvxlab.support.file_manager import FileManager
files = FileManager(Logger())
[docs]
def create_model_dir(
model_dir_name: str = 'model',
main_dir_path: Optional[str] = None,
force_overwrite: bool = False,
settings_file_type: Defaults.LiteralTypes.SettingsSource = 'yml',
include_user_defined_templates: bool = False,
) -> None:
"""Create a model directory with template configuration files.
This function creates a new model directory with the specified name under
the given main directory path. It can generate template configuration files
in either YAML or Excel format. If the directory already exists, it can be
overwritten based on user confirmation or the force_overwrite flag. Optionally,
supplementary files can be copied into the new model directory (user defined
operators/constants functions template).
Args:
model_dir_name (str): The name of the model directory to create. Defaults
to 'model'.
main_dir_path (str): The path to the main directory where the model
directory will be created. If None, the current working directory will
be used. Defaults to None.
force_overwrite (bool, optional): If True, overwrite existing directory
without confirmation. Defaults to False.
settings_file_type (Defaults.LiteralTypes.SettingsSource, optional): The
type of template configuration file to generate ('yml' or 'xlsx').
Defaults to 'yml'.
include_user_defined_templates (bool, optional): If True, copy user-defined
template files into the new model directory (user-defined operators and
constants templates). Defaults to False.
Raises:
ValueError: If the template file type is unsupported.
"""
structures = Defaults.DefaultStructures
config_files = Defaults.ConfigFiles
if main_dir_path is None:
main_dir_path = str(Path.cwd())
model_dir_path = Path(main_dir_path) / model_dir_name
files.logger.info(
f"Generating model '{model_dir_name}' directory with basic "
f"{settings_file_type} configuration files.")
util.validate_selection(
valid_selections=config_files.AVAILABLE_SETUP_SOURCES,
selection=settings_file_type)
if model_dir_path.exists():
if not files.erase_dir(
dir_path=model_dir_path,
force_erase=force_overwrite):
return
files.create_dir(model_dir_path, force_overwrite)
if settings_file_type == 'yml':
structure_name = config_files.SETUP_INFO
structure_mapping = {
structure_name[0]: structures.SET_STRUCTURE,
structure_name[1]: structures.DATA_TABLE_STRUCTURE,
structure_name[2]: structures.PROBLEM_STRUCTURE,
}
for file_name, structure_template in structure_mapping.items():
_generate_yaml_template(
structure=structure_template[1],
model_dir_path=model_dir_path,
file_name=f"{file_name}.yml",
header=structure_template[0],
)
elif settings_file_type == 'xlsx':
structure_mapping = Defaults.DefaultStructures.XLSX_TEMPLATE_COLUMNS
template_file_name = config_files.SETUP_XLSX_FILE
files.dict_to_excel_headers(
dict_name=structure_mapping,
excel_dir_path=model_dir_path,
excel_file_name=template_file_name,
)
else:
msg = f"Unsupported template file type '{settings_file_type}'."
files.logger.error(msg)
raise ValueError
if include_user_defined_templates:
copy_user_defined_templates(model_dir_path)
def _generate_yaml_template(
structure: dict,
model_dir_path: Path,
file_name: str,
header: str,
) -> None:
"""Generate a YAML template file from a given structure.
Recursively converts a nested dictionary structure into a YAML-formatted
file and saves it in the specified model directory. If the file already
exists, prompts the user for confirmation before overwriting.
Args:
structure (dict): Dictionary defining the structure of the YAML file.
model_dir_path (Path): Path to the model directory where the file
will be created.
file_name (str): Name of the YAML file to create.
header (str): Header comment to include at the top of the file.
"""
file_path = model_dir_path / file_name
if file_path.exists():
if not util.get_user_confirmation(
f"File '{file_name}' already exists. Overwrite?"
):
return
def _convert_to_yaml(
structure: dict,
indent: int,
) -> list[str]:
optional_key = Defaults.DefaultStructures.OPTIONAL
any_key = Defaults.DefaultStructures.ANY
yaml_lines = []
indent_str = ' ' * indent
for key, value in structure.items():
key = '_' if key == any_key else key
if isinstance(value, type):
value = value.__name__
yaml_lines.append(f"{indent_str}{key}: {value}")
elif isinstance(value, dict):
yaml_lines.append(f"{indent_str}{key}: ")
yaml_lines.extend(_convert_to_yaml(value, indent + 1))
elif isinstance(value, tuple):
if all(isinstance(item, type) for item in value):
value = ", ".join([item.__name__ for item in value])
yaml_lines.append(f"{indent_str}{key}: {value}")
elif value[0] is optional_key:
if isinstance(value[1], dict):
yaml_lines.append(f"{indent_str}{key}: # optional ")
yaml_lines.extend(
_convert_to_yaml(value[1], indent + 1))
elif isinstance(value[1], type):
value = f"{value[1].__name__} # optional"
yaml_lines.append(f"{indent_str}{key}: {value}")
return yaml_lines
if header:
indent = 1
yaml_content = [header]
else:
indent = 0
yaml_content = []
yaml_content.extend(_convert_to_yaml(structure, indent))
try:
with open(file_path, 'w') as file:
file.write('\n'.join(yaml_content))
except IOError as e:
raise IOError(f"Error writing to file '{file_name}': {e}") from e
[docs]
def copy_user_defined_templates(path_destination: Optional[Path] = None) -> None:
"""Copy user-defined template files to destination path.
This function copies user-defined template files such as templates for custom
operators and constants to a defined path.
Args:
path_destination (Path): The path to the model directory where the files
will be included.
"""
files_config = {
'custom_operators_template': {
'source_path': Defaults.ConfigFiles.TEMPLATES_DIR_PATH,
'file_name': Defaults.ConfigFiles.CUSTOM_OPERATORS_FILE_NAME,
},
'custom_constants_template': {
'source_path': Defaults.ConfigFiles.TEMPLATES_DIR_PATH,
'file_name': Defaults.ConfigFiles.CUSTOM_CONSTANTS_FILE_NAME,
},
}
if path_destination is None:
path_destination = Path.cwd()
for _, config in files_config.items():
files.copy_file_to_destination(
path_source=config['source_path'],
path_destination=path_destination,
file_name=config['file_name'],
)
files.logger.info(f"File '{config['file_name']}' copied.")
[docs]
def transfer_setup_info_xlsx(
source_file_name: str,
source_dir_path: str | Path,
destination_dir_path: str | Path,
update: Defaults.LiteralTypes.TransferUpdate = 'all',
) -> None:
"""Transfer setup information from an Excel file.
This function transfers setup information from a source Excel file to
existing configuration files in a destination directory. It can update
either the settings file, the sets file, or both, based on user input.
If the destination files already exist, the user is prompted for confirmation
before overwriting them.
This function is useful for complex model settings, in order to avoid direct
modifications in setup files, allows the user to work in a separate Excel file
and then transfer the relevant information to the model's configuration files
before running the model.
Args:
source_file_name (str): The name of the source Excel file.
source_dir_path (str | Path): The directory path of the source file.
destination_dir_path (str | Path): The directory path where the
configuration files are located.
update (Defaults.LiteralTypes.TransferUpdate, optional): Specifies which
files to update: 'settings', 'sets', or 'all'. Defaults to 'all'.
Raises:
FileNotFoundError: If the source file or destination files do not exist.
"""
files = FileManager(Logger())
source_file_path = Path(source_dir_path, source_file_name)
settings_file_name = Defaults.ConfigFiles.SETUP_XLSX_FILE
sets_file_name = Defaults.ConfigFiles.SETS_FILE
target_files = {
'settings': ['settings'],
'sets': ['sets'],
'all': ['settings', 'sets']
}
target_info = {
'settings': {
'destination_file_name': settings_file_name,
'destination_file_path': Path(destination_dir_path, settings_file_name),
'cols_to_drop': ['notes', 'skip'],
'tabs_to_update': list(Defaults.ConfigFiles.SETUP_INFO.values()),
},
'sets': {
'destination_file_name': sets_file_name,
'destination_file_path': Path(destination_dir_path, sets_file_name),
'cols_to_drop': ['id', 'notes'],
},
}
# checks if files exists
missing_files = []
if not source_file_path.exists():
msg = (
f"Source file '{source_file_name}' not found in "
f"'{source_dir_path}'. This file is used to transfer model "
f"settings to the configuration files. Please ensure it exists "
f"in the specified directory."
)
files.logger.error(msg)
raise FileNotFoundError(msg)
for category in target_files[update]:
file_path: Path = target_info[category]['destination_file_path']
if not file_path.exists():
missing_files.append(
target_info[category]['destination_file_name'])
if missing_files:
msg = f"Missing destination file/s: {missing_files}"
files.logger.error(msg)
raise FileNotFoundError(msg)
# loop to export data
for category in target_files[update]:
destination_file_name = target_info[category]['destination_file_name']
cols_to_drop = target_info[category]['cols_to_drop']
if category == 'settings':
tabs_to_update = target_info[category]['tabs_to_update']
elif category == 'sets':
xlsx_file = pd.ExcelFile(source_file_path)
tabs_to_update = [
tab for tab in xlsx_file.sheet_names
if tab.startswith(Defaults.Labels.SET_TABLE_NAME_PREFIX)
]
# confirmation
if not util.get_user_confirmation(
f"File {destination_file_name} already exists. \
Do you want to overwrite it?"
):
files.logger.warning(
f"File '{destination_file_name}' not overwritten.")
continue
# update tabs
for tab in tabs_to_update:
files.logger.info(
f"Updating '{tab}' tab in '{destination_file_name}' file.")
df_dict = files.excel_to_dataframes_dict(
excel_file_name=source_file_name,
excel_file_dir_path=source_dir_path,
sheet_names=[tab],
)
df = df_dict[tab]
if 'skip' in cols_to_drop and 'skip' in df.columns:
df = df[df['skip'].isna()]
df_filtered = df.drop(columns=cols_to_drop, errors='ignore')
files.dataframe_to_excel(
dataframe=df_filtered,
excel_filename=destination_file_name,
excel_dir_path=destination_dir_path,
sheet_name=tab,
force_overwrite=True,
)
[docs]
def handle_model_instance(
action: Defaults.LiteralTypes.HandleModelInstance,
file_name: str,
source_dir_path: str | Path = None,
instance: Model = None,
) -> Model | None:
"""Save or load model instances.
This function allows saving a model instance to a file or loading a model
instance from a file. Model instances are automatically saved in the model's
directory (defined in instance path attribute). When loading, the user
must provide the source directory path.
Args:
action (Defaults.LiteralTypes.HandleModelInstance): The action to perform,
either 'save' or 'load'.
file_name (str): The name of the file to save/load the model instance.
source_dir_path (str | Path, optional): The directory path to load the
model instance from. Required if action is 'load'.
instance (Model, optional): The model instance to save. Required if
action is 'save'.
Returns:
Model | None: The loaded model instance if action is 'load', otherwise None.
Raises:
ValueError: If the action is invalid or required parameters are missing.
ValueError: If the model instance is invalid when saving.
"""
if action not in ['save', 'load']:
raise ValueError("Invalid action. Must be either 'save' or 'load'.")
if action == 'save':
if instance is None:
raise ValueError("Instance must be provided for saving.")
_save_model_instance(instance, file_name)
return None
elif action == 'load':
if source_dir_path is None:
raise ValueError(
"Source directory path must be provided for loading.")
return _load_model_instance(file_name, source_dir_path)
def _save_model_instance(
instance: Model,
file_name: str,
) -> None:
"""Save a model instance to a file.
Args:
instance (Model): The model instance to save.
file_name (str): The name of the file to save the model instance.
Raises:
ValueError: If the model instance is invalid.
ValueError: If the file extension is invalid.
"""
if not isinstance(instance, Model):
raise ValueError(
"Invalid model instance. Must be a valid 'Model' instance.")
if '.' in file_name:
if not file_name.endswith('.pkl'):
raise ValueError(
"Invalid file name. If file extension is provided, it must"
"be '.pkl'."
)
else:
file_name = f"{file_name}.pkl"
files = FileManager(Logger())
model_dir_path = instance.paths['model_dir']
model_name = instance.settings['model_name']
instances_dir = Defaults.ConfigFiles.INSTANCES_DIR
instances_save_path = Path(model_dir_path) / instances_dir
instance_file_path = instances_save_path / file_name
if not instances_save_path.exists():
files.create_dir(instances_save_path)
files.logger.info(
f"Saving model instance '{model_name}' as '{file_name}' in "
f"'{instances_save_path}'.")
erased_instance = True
if instance_file_path.exists():
files.logger.info(f"File '{file_name}' already exists.")
erased_instance = files.erase_file(
dir_path=instances_save_path,
file_name=file_name,
force_erase=False,
confirm=True,
)
if erased_instance:
# cleaning up non-serializable attributes
instance.core.sqltools.connection = None
instance.core.sqltools.cursor = None
with open(instance_file_path, 'wb') as file:
dill.dump(instance, file)
files.logger.info(
f"Model instance '{file_name}' saved.")
else:
files.logger.info(
f"Model instance '{file_name}' NOT overwritten.")
def _load_model_instance(
file_name: str,
source_dir_path: str | Path,
) -> Model:
"""Load a model instance from a file.
Args:
file_name (str): The name of the file to load the model instance from.
source_dir_path (str | Path): The directory path to load the model
instance from.
Raises:
ValueError: If the file extension is invalid.
FileNotFoundError: If the file is not found.
e: If there is an error loading the model instance.
Returns:
Model: _description_
"""
if '.' in file_name:
if not file_name.endswith('.pkl'):
raise ValueError(
"Invalid file name. File name must end with '.pkl'.")
else:
file_name = f"{file_name}.pkl"
files = FileManager(Logger())
file_path = Path(source_dir_path) / file_name
if not file_path.exists():
msg = f"File '{file_name}' not found."
files.logger.error(msg)
raise FileNotFoundError(msg)
files.logger.info(
f"Loading model instance from '{file_name}' in '{source_dir_path}'.")
try:
with open(file_path, 'rb') as file:
model_instance = dill.load(file)
return model_instance
except Exception as e:
files.logger.error(f"Error loading model instance '{file_name}': {e}")
raise e