Source code for cvxlab.defaults

"""Module collecting all fundamental default settings of the package.

Defaults of the package are useful for simplifying renaming package labels,
defining templates for fundamental package objects (sets, data tables, variables, ...)
for validation purposes, defining fundamental numerical settings and template 
text messages.
"""
from typing import Literal, Union

import cvxpy as cp
import numpy as np
import pandas as pd

from cvxlab.support import util_constants, util_operators



[docs]
class Defaults:
    """Centralized repository for default settings of the package.

    Defaults are grouped into meaningful categories (sub-classes) for clarity and 
    ease of access. Supports direct attribute access of default settings using '__getattr__' method.

    Subgroups:

    - LiteralTypes: Shared Literal type aliases for validating settings and type hints.
    - ConfigFiles: Defaults related to configuration and file management.
    - Labels: Standard headers and field names.
    - DefaultStructures: Default structures for data validation.
    - SymbolicDefinitions: Allowed constants and operators for symbolic problem definitions.
    - NumericalSettings: Settings for numerical solvers and tolerances.
    - TextNotes: Text notes and messages for user.

    Usage:: 

        # Attributes of sub-groups can be accessed directly
        Defaults.ConfigFiles.SETUP_FILES
    """

    _SUBGROUPS = []

    class LiteralTypes:
        """Shared Literal type aliases (single source of truth).

        To be used to validate settings and for type hints across the package. 
        Changing these aliases will change the allowed values for settings across 
        the package.
        """
        SettingsSource = Literal['yml', 'xlsx']
        DataFileType = Literal['xlsx', 'csv']
        LogLevel = Literal['info', 'debug', 'warning', 'error']
        LogFormat = Literal['standard', 'detailed']
        NormType = Literal['max_relative', 'max_absolute', 'l1', 'l2', 'linf']
        SetUpdateMode = Literal['all', 'filters', 'aggregations']
        HybridVarType = Literal['endogenous', 'exogenous']
        TransferUpdate = Literal['settings', 'sets', 'all']
        ConvergenceTables = Literal['all_endogenous', 'hybrid_only']
        HandleModelInstance = Literal['save', 'load']
        ExcelEngine = Literal['openpyxl', 'xlsxwriter']
        TableHandling = Literal['update', 'overwrite']

    class ConfigFiles:
        """Defaults related to configuration and file management.

        - SETUP_INFO: Dictionary mapping setup information groups.
        - SETUP_XLSX_FILE: Default name of the setup Excel file.
        - SETS_FILE: Default name of the sets Excel file.
        - AVAILABLE_SETUP_SOURCES: List of possible formats for input data sources.
        - INPUT_DATA_DIR: Default name for input data directory.
        - INPUT_DATA_FILE_NAME: Default name for the input data file (for single input data file).
        - AVAILABLE_DATA_FILES_EXTENSIONS: Default extensions for data file/s.
        - SQLITE_DATABASE_FILE: Default name for the SQLite database file.
        - SQLITE_DATABASE_FILE_BKP: Default name for the SQLite database backup file.
        - SQLITE_DATABASE_FILE_TEST: Default name for the SQLite database test file.
        - INSTANCES_DIR: Default name for the directory containing saved Model instances.

        """

        SETUP_INFO = {
            0: 'structure_sets',
            1: 'structure_variables',
            2: 'problem'
        }
        SETUP_XLSX_FILE = 'model_settings.xlsx'
        SETS_FILE = 'sets.xlsx'
        AVAILABLE_SETUP_SOURCES = ['yml', 'xlsx']
        AVAILABLE_DATA_FILES_EXTENSIONS = ['xlsx', 'csv']
        INPUT_DATA_DIR = 'input_data'
        INPUT_DATA_FILE_NAME = 'input_data'
        SQLITE_DATABASE_FILE = 'database.db'
        SQLITE_DATABASE_FILE_BKP = 'database_bkp.db'
        SQLITE_DATABASE_FILE_TEST = 'database_expected.db'
        TEMPLATES_DIR_PATH = r'cvxlab/templates'
        INSTANCES_DIR = 'instances'
        CUSTOM_OPERATORS_FILE_NAME = 'user_defined_operators.py'
        CUSTOM_CONSTANTS_FILE_NAME = 'user_defined_constants.py'

    class Labels:
        """Default labels and field names for dictionary keys and dataframes.

        Dictionary keys default labels:

        - NAME: key related to the name of the data table object.
        - FILTERS: key related to set and data tables filters.
        - SET: key related to the set key of the set table.
        - DIM: key related to the dimension of the set.
        - AGGREGATIONS: key related to set tables aggregations.
        - CVXPY_VAR: key related to cvxpy variables.
        - COORDINATES_KEY: key related to the coordinates of the data table.
        - VARIABLES_INFO_KEY: key related to the variables information.
        - VALUE_KEY: key related to the type for variable of type constants.
        - BLANK_FILL_KEY: key related to the value used to fill blank.
        - NONNEG_KEY: key related to the non-negativity sign of endogenous variables.

        Dataframe columns default labels:

        - SUB_PROBLEM_KEY: column representing sub-problem keys.
        - FILTER_DICT_KEY: column representing filter dictionaries.
        - PROBLEM: column representing problem names.
        - SCENARIO_COORDINATES: column representing scenario coordinates.
        - PROBLEM_STATUS: column representing problem status.
        - OBJECTIVE: column representing the objective of the problem.
        - EXPRESSIONS: column representing expressions in the problem.
        - RMS_TABLES: column representing label for RMS reporting monitor for 
            integrated problems solving.

        Default field types for SQLite data tables:

        - GENERIC_FIELD_TYPE: default field type for text.
        - VALUES_FIELD: default structure and type for numeric values.
        - ID_FIELD: default structure and type for IDs.

        Default suffixes and prefixes for SQLite tables:

        - SET_TABLE_NAME_PREFIX: prefix for set table names.
        - COLUMN_NAME_SUFFIX: suffix for column names.
        - COLUMN_AGGREGATION_SUFFIX: suffix for aggregation columns.
        """

        NAME = 'name'
        FILTERS = 'filters'
        SET = 'set'
        DIM = 'dim'
        AGGREGATIONS = 'aggregations'
        CVXPY_VAR = 'variable'
        COORDINATES_KEY = 'coordinates'
        VARIABLES_INFO_KEY = 'variables_info'
        VALUE_KEY = 'value'
        BLANK_FILL_KEY = 'blank_fill'
        NONNEG_KEY = 'nonneg'

        SUB_PROBLEM_KEY = 'sub_problem_key'
        FILTER_DICT_KEY = 'filter'
        PROBLEM = 'problem'
        SCENARIO_COORDINATES = 'info'
        PROBLEM_STATUS = 'status'
        OBJECTIVE = 'objective'
        EXPRESSIONS = 'expressions'

        GENERIC_FIELD_TYPE = 'TEXT'
        VALUES_FIELD = {'values': ['values', 'REAL']}
        ID_FIELD = {'id': ['id', 'INTEGER PRIMARY KEY']}

        SET_TABLE_NAME_PREFIX = '_set_'
        COLUMN_NAME_SUFFIX = '_Name'
        COLUMN_AGGREGATION_SUFFIX = '_agg_'

    class DefaultStructures:
        """Default structures for data validation and for generating templates.

        Changing these structures will change the way data is validated and
        how templates are generated.

        SET_STRUCTURE: Structure for defining sets with metadata and filters. 
        Sets are defined as the dimensions of the numerical problem. 

        Properties of a SET:

        - set_key: 
            name of the set, used as name of the set in the SQLite data 
            table. Since SQLite tables names are case insensitive, the set_key
            is case insensitive (e.g., 'e' and 'E' are the same set). This is 
            the sole set information that is required to define a set.
        - description: 
            (Optional) information of the set provided by the modeler.
        - split_problem: 
            (Optional) if the set items defines independent numerical 
            sub-problems. In case more sets are defined, the number of sub-problems
            is equal to the linear combination of the items of the sets.
        - copy_from: 
            (Optional) key of another set to copy the data from. If a set is 
            defined with this key, it will copy the data from the set with
            the key defined in this field, and it is not necessary to provide the
            related data in the sets file.
        - filters: 
            (Optional) dictionary with keys as the filters name and values
            as the list of filter values. Filters are used to define variables 
            as part of data tables.
        - aggregations: 
            (Optional) list of set aggregations useful to aggregate data for 
            visualization. Not recalled in operating with numerical problem.


        DATA_TABLE_STRUCTURE: Structure for defining data tables with variables 
        and their properties. Data tables correspond to SQLite tables and are 
        used to generate variables in the numerical problem. 

        Properties of a DATA TABLE:

        - table_key: 
            name of the data table, used as name of the table in the SQLite data 
            table. Since SQLite tables names are case insensitive, the
            table_key is case insensitive (e.g., 'e' and 'E' are the same table).
        - description: 
            (Optional) information of the data table provided by the modeler.
        - type: 
            type of the data table, can be one of VARIABLE_TYPES or a dictionary 
            with keys as problem name and corresponding values as allowed types.
        - variable_domain: 
            (Optional) domain of the endogenous variable: 'integer' for integer
            variables, 'boolean' for binary {0,1} variables. If not specified,
            the variable is continuous (default: None).
        - coordinates: 
            list of table coordinates (set_key symbols) that define the dimensions 
            of the data table.
        - variables_info: 
            definition of the variables based on the same data table. It is a 
            dictionary with keys as variables names and values as dict with
            variable info. Variables are case sensitive.

            - value: 
                (Optional) ALLOWED_CONSTANTS (only for constants!).
            - blank_fill: 
                (Optional) numerical value that will be used to fill variables
                in case of blank or nan values.
            - nonneg: 
                (Optional) defines whether an endogenous variable is expected to 
                be non-negative. This will result in an implicit non-negativity 
                constraint defined in the model. In case of integrated models,
                this constraint is useful to check if endogenous variables values 
                are exchanged with the correct sign, to avoid numerical 
                inconsistencies. Notes:

                - In case of single-problem setups, all implicit expressions are 
                  added to the problem.
                - In case of multiple problems:

                  - For hybrid variables type, the non negativity constraints 
                    are added to the problem where the variable is defined as
                    endogenous.
                  - For pure endogenous variables, the non negativity constraints
                    are added only if the variable is used in any other problem
                    expressions. In case the variable is not used in any expression,
                    an error is raised (constraints must be explicitly defined in 
                    symbolic problem).

            - set_key: 
                (Optional) dictionary with keys as set_key symbols and values
                defining the dimension and filters for the set.

                - dim: 
                    (Optional) DIMENSIONS (included in 'coordinates').
                - filters: 
                    (Optional) dictionary with keys as the filters key of the set
                    and values as the list of values to filter.


        PROBLEM_STRUCTURE: Structure for defining problems with objectives and 
        expressions.

        Properties of a PROBLEM:

        - problem_key: 
            (Optional) name of the problem, used to define multiple problems
            in the same model. If not defined, the problem is considered as the 
            main problem.
        - objective: 
            (Optional) list of expressions defining the objective of the problem.
            It can be a single expression or a list of expressions. The objective is
            defined as a scalar that can be minimized or maximized. If not defined,
            the problem is solved as a system of equations.
        - expressions: 
            list of expressions defining the problem constraints. Each expression
            can be an equality or inequality. The expressions are defined as a list of
            cvxpy expressions. At least one expression is required to define a problem.
        - description: 
            (Optional) list of strings with the problem description provided
            by the modeler. It is used to provide additional information about the problem.

        XLSX_PIVOT_KEYS and XLSX_TEMPLATE_COLUMNS: Keys and columns used to pivot 
        data for generating Excel template files for sets, variables, and problems.

        ALLOWED_BOOL: Dictionary mapping common boolean representations to Python 
        boolean values. This is used to validate boolean values in the data files.
        """

        OPTIONAL = object()
        ANY = object()
        METADATA = 'description'

        SET_STRUCTURE = (
            'set_key:',
            {
                METADATA: (OPTIONAL, str),
                'split_problem': (OPTIONAL, bool),
                'copy_from': (OPTIONAL, str),
                'filters': (OPTIONAL, {ANY: list}),
                'aggregations': (OPTIONAL, Union[int, str, list]),
            }
        )

        DATA_TABLE_STRUCTURE = (
            'table_key:',
            {
                METADATA: (OPTIONAL, str),
                'type': (str, dict),
                'variable_domain': (OPTIONAL, str),
                'coordinates': (str, list),
                'variables_info': {
                    ANY: {
                        'value': (OPTIONAL, str),
                        'blank_fill': (OPTIONAL, Union[int, float]),
                        'nonneg': (OPTIONAL, bool),
                        ANY: (OPTIONAL, {
                            'dim': (OPTIONAL, str),
                            'filters': (OPTIONAL, dict),
                        })
                    }
                }
            }
        )

        PROBLEM_STRUCTURE = (
            'problem_key: # optional',
            {
                'objective': (OPTIONAL, list),
                'expressions': list,
                METADATA: (OPTIONAL, list),
            }
        )

        XLSX_PIVOT_KEYS = {
            'structure_sets': ('set_key', None),
            'structure_variables': ('table_key', 'variables_info'),
            'problem': ('problem_key', None),
        }

        XLSX_TEMPLATE_COLUMNS = {
            'structure_sets': [
                'set_key',
                *SET_STRUCTURE[1].keys()
            ],
            'structure_variables': [
                'table_key',
                *DATA_TABLE_STRUCTURE[1].keys(),
                'value',
                'blank_fill',
                'set_keys ...'
            ],
            'problem': [
                'problem_key',
                *PROBLEM_STRUCTURE[1].keys()
            ],
        }

        ALLOWED_BOOL = {
            'true': True, 'True': True, 'TRUE': True,
            'false': False, 'False': False, 'FALSE': False,
        }

    class SymbolicDefinitions:
        """Allowed constants and operators for symbolic problem definitions.

        - TOKEN_PATTERNS: 
            Dictionary of regex patterns for token types (text, 
            numbers, operators, parentheses). Determines how the symbolic
            expressions are parsed and validated.
        - NONE_SYMBOLS: 
            List of symbols considered as None or empty.
        - STD_TEXT_DATA_FILL: 
            Standard text used to fill blank text fields in SQLite sets or data tables.
        - DIMENSIONS: 
            Dict of allowed dimensions of variables, where coordinates can be defined.
        - VARIABLE_TYPES: 
            Dict of allowed variable types for data tables.
        - ALLOWED_CONSTANTS: 
            Dictionary of user-defined constants. These are defined within the 
            dictionary values (complex constants are defined in util_constants module), 
            are used to define variables (see 'value' field in data tables). 
            Constants are be built with exogenous data only. 
        - ALLOWED_OPERATORS: 
            Dictionary of user-defined operators. Keys of operators can be directly 
            used in symbolic expressions. These are defined within the dictionary 
            values (complex operators are defined in util_operators module). 
            Operators are built with exogenous data only. 

        """

        TOKEN_PATTERNS = {
            'text': r"\b[a-zA-Z_][a-zA-Z0-9_]*\b",
            'numbers': r"\b(?:\d+\.\d*|\.\d+|\d+)(?:[eE][+-]?\d+)?\b",
            'operators': [
                r"==", r">=", r"<=",
                r"\+", r"-", r"\*", r"/", r"@", f",",
                r"&",
            ],
            'parentheses': [r"\(", r"\)"],
        }

        NONE_VARIANTS = [
            pd.NA, np.nan, None, float('nan'),
            'nan', 'NaN', 'NA', 'na', 'N/A', '<NA>', '<N/A>', 'None', 'null', '',
        ]
        STD_TEXT_DATA_FILL = ''

        DIMENSIONS = {
            'ROWS': 'rows',
            'COLS': 'cols',
            'INTRA': 'intra',
            'INTER': 'inter',
        }

        VARIABLE_TYPES = {
            'CONSTANT': 'constant',
            'EXOGENOUS': 'exogenous',
            'ENDOGENOUS': 'endogenous',
        }

        VARIABLE_DOMAINS = {
            'INTEGER': 'integer',
            'BOOLEAN': 'boolean',
        }

        ALLOWED_CONSTANTS = util_constants.CONSTANTS
        ALLOWED_OPERATORS = util_operators.OPERATORS

    class NumericalSettings:
        """Settings for numerical solvers and tolerances.

        - STD_VALUES_TYPE: 
            Type of standard numerical values.
        - ALLOWED_VALUES_TYPES: 
            Tuple of allowed numerical types for values. Useful to recognize and 
            homogenize numerical values provided by user in input data files.
        - ALLOWED_TEXT_TYPE: 
            Type of allowed text values.
        - ALLOWED_SOLVERS:
            List of allowed solvers installed in the current CVXPY version.
        - TOLERANCE_TESTS_RESULTS_CHECK: 
            Tolerance for checking results of tests. It is a relative difference 
            (0.02 means 2% of maximum allowed difference between the resulting 
            database and the test databases).
        - ROUNDING_DIGITS_RELATIVE_DIFFERENCE_DB: 
            Number of digits to round the relative difference between the values 
            of the database and the test database. It is used to avoid numerical 
            issues in case of very small differences between the values of the 
            database and the test database.
        - SPARSE_MATRIX_ZEROS_THRESHOLD: 
            Threshold for considering a matrix as sparse. (0.3 means 30% of zeros 
            in the matrix). If the percentage of zeros in the matrix is higher 
            than this threshold, the matrix is considered as sparse and stored 
            in a sparse format to save memory.
        - SQL_BATCH_SIZE: 
            Batch size for SQL operations. It is used to limit the number of rows 
            processed in a single SQL operation to avoid memory issues and speed 
            up database operations.
        - SQL_MAX_BATCH_MEMORY_MB:
            Maximum memory (in MB) allocated for SQL batch operations. It is used
            to dynamically adjust the batch size based on the available memory.
        - SQL_BATCH_SIZE_MIN:
            Minimum batch size for SQL operations.
        - SQL_BATCH_SIZE_MAX:
            Maximum batch size for SQL operations.
        - CVXPY_DEFAULT_SETTINGS:
            Default settings for CVXPY solver. It includes the default solver, 
            canon backend, and whether to ignore DPP.
        - MODEL_COUPLING_SETTINGS:
            Settings for model coupling. It includes:
                allowed norms for convergence (max_relative, max_absolute, l1, l2, linf),
                numerical tolerance for convergence for each table (absolute value),
                numerical tolerance for convergence for all tables (RMS, absolute value), 
                maximum number of iterations.
        """

        STD_VALUES_TYPE = float
        ALLOWED_VALUES_TYPES = (
            int, float, np.dtype('float64'), np.dtype('int64'))
        ALLOWED_TEXT_TYPE = str
        ALLOWED_SOLVERS = cp.installed_solvers()
        TOLERANCE_TESTS_RESULTS_CHECK = 0.02
        ROUNDING_DIGITS_RELATIVE_DIFFERENCE_DB = 5
        SPARSE_MATRIX_ZEROS_THRESHOLD = 0.7
        SQL_BATCH_SIZE = 1000
        SQL_MAX_BATCH_MEMORY_MB = 10
        SQL_BATCH_SIZE_MIN = 100
        SQL_BATCH_SIZE_MAX = 10000
        CVXPY_DEFAULT_SETTINGS = {
            'solver': 'SCIPY',
            'canon_backend': cp.SCIPY_CANON_BACKEND,
            'ignore_dpp': True,
        }
        MODEL_COUPLING_SETTINGS = {
            'allowed_norms': ['max_relative', 'max_absolute', 'l1', 'l2', 'linf'],
            # Per-table RELATIVE tolerance
            'relative_tolerance': 0.01,
            # Per-table ABSOLUTE minimum tolerance (minimum guard value)
            'absolute_minimum_guard_tolerance': 0.001,
            'max_iterations': 20,
        }

    _SUBGROUPS = [
        LiteralTypes,
        ConfigFiles,
        Labels,
        DefaultStructures,
        SymbolicDefinitions,
        NumericalSettings,
    ]

    @classmethod
    def __getattr__(cls, name):
        """Provide direct access to default settings by searching nested groups.

        Args:
            name (str): The name of the attribute to retrieve.

        Returns:
            Any: The requested constant or attribute.

        Raises:
            AttributeError: If the attribute is not found.
        """
        for subgroup in cls._SUBGROUPS:
            if hasattr(subgroup, name):
                return getattr(subgroup, name)
        raise AttributeError(
            f"Constant '{name}' not found in {cls.__name__}.")