from typing import Any, Dict, Callable, Tuple, List, Union
from qualipy.util import copy_function_spec
def _get_functions(functions, column_name: str = None) -> Tuple[str, Callable]:
methods = []
if isinstance(functions, dict):
if column_name in functions:
functions = functions[column_name]
else:
functions = []
if functions:
for func in functions:
copied_function = copy_function_spec(func)
methods.append((copied_function.__name__, copied_function))
return methods
class Column:
column_name = None
column_type = None
force_type = False
overwrite_type = False
null = True
force_null = False
unique = False
is_category = False
functions = []
extra_functions = []
def _as_dict(self, name: str, read_functions: bool = True) -> Dict[str, Any]:
dict_ = {
"name": self.column_name,
"type": self.column_type,
"force_type": self.force_type,
"overwrite_type": self.overwrite_type,
"null": self.null,
"force_null": self.force_null,
"unique": self.unique,
"is_category": self.is_category,
"functions": self._get_functions(column_name=name)
if read_functions
else self.functions,
"extra_functions": self._get_functions("extra_functions", column_name=name),
}
return dict_
def _from_dict(self, args: Dict):
for key, val in args.items():
setattr(self, key, val)
def _get_functions(
self, fun_attribute: str = "functions", column_name: str = None
) -> Tuple[str, Callable]:
methods = []
given_methods = getattr(self, fun_attribute, None)
if fun_attribute == "extra_functions":
if column_name in given_methods:
given_methods = given_methods[column_name]
else:
given_methods = []
if given_methods:
for func in given_methods:
copied_function = copy_function_spec(func)
methods.append((copied_function.__name__, copied_function))
return methods
[docs]def column(
column_name: Union[str, List[str]] = None,
column_type=None,
force_type: bool = False,
overwrite_type: bool = False,
null: bool = True,
force_null: bool = False,
unique: bool = False,
is_category: bool = False,
is_date: bool = False,
split_on: str = None,
column_stage_collection_name: str = None,
functions: List[Union[Callable, Dict]] = None,
extra_functions: Dict[str, Dict] = None,
):
"""This allows us to map to a column of a data object.
This is one of the essential components of Qualipy. Using column ``allows`` us to map
to a specific column of whatever data object we are reflecting, and specify
what that column should look like - as well as apply any aggregate functions we've
defined.
Note - You must explicitly add it to the Project object in order for it to run.
Args:
column_name: The name of the column in the data object - Generally either the column name
in the pandas or SQL table.
column_type: Useful if you want to enforce types in a pandas DataFrame. See (link here) DataTypes section
for more information.
force_type: If column_type is used, should the type be enforced. Setting this to True means that
the entire process will halt if right type is not present.
overwrite_type: This is useful if the aggregate function requires a specific datatype for it to be
computed.
null: Can the column contain missing values
force_null: If null is set to False - should the process fail given there are missing values present.
unique: Should uniqueness in the column be enforced.
is_category: Denoting a column as a category has several consequences - including automatically
collecting counts for each category.
functions: A list of property defined functions.
extra_functions: If this mapping is used for multiple columns but want a function to be applied to
only one of the columns, use this. See example for more information.
Returns:
A column object that can be added to a Project. See Project for more details.
"""
if functions is None:
functions = []
if extra_functions is None:
extra_functions = {}
def return_column_dict(name: str) -> Dict[str, Any]:
dict_ = {
"name": column_name,
"type": column_type,
"force_type": force_type,
"overwrite_type": overwrite_type,
"null": null,
"force_null": force_null,
"unique": unique,
"is_category": is_category,
"is_date": is_date,
"split_on": split_on,
"column_stage_collection_name": column_stage_collection_name,
"functions": _get_functions(functions, column_name=name),
"extra_functions": _get_functions(extra_functions, column_name=name),
}
return dict_
return_column_dict.column_name = column_name
return return_column_dict