.. highlight:: sh =================== Configuration File =================== The configuration file drives a lot of the reporting and anomaly detection in Qualipy. The default config.json file is created upon running ``qualipy generate-config`` through the CLI. Some notes on the configuration: * how to reference metrics... The following json has all available keys one could set. :: { # The sqlalchemy like string that tells Qualipy where to store all data. # By default this is set to a sqlite file within the config directory "QUALIPY_DB": "sqlite:////tmp/.qualipy/qualipy.db"", # If using a database like postgres that supports schemas, setting this # would place all data in that specific schema "SCHEMA": "", # the name of the project were configuring. This corresponds to your Qualipy # pipeline. "example_project": { # This is where you specify anomaly specific settings "ANOMALY_ARGS": { # Each anomaly score corresponds to a standardized value. In general, # anything over 1 is considered an anomaly, but this could be used to # control the severity of the outliers "importance_level": 1.3, # This is used to set "rules" for any specific column. See what rules # are available to use **here (set this) "specific": { # to reference an aggregate, use run_name + column_name + metric_name + arguments (if any) "rows_my_column_count_": { # "increasing" is just an example of a function that checks whether # or not the aggregate is always increasing. This might be useful # when you're inspecting the total size of a database "increasing": { # Can be turned on and off "use": true, # Since this is not a machine learning based approach, you have # to set your own severity level when using custom rules "severity": 3 } }, } }, # What anomaly model to use. See the Anomaly Detection guide for different # options "ANOMALY_MODEL": "prophet", # Date format to use on reports "DATE_FORMAT": "%Y-%m-%d", # Minimum severity level to set for filtering out numerical # anomalies on the anomaly report "NUM_SEVERITY_LEVEL": 1, # Minimum severity level to set for filtering out categorical # anomalies on the anomaly report "CAT_SEVERITY_LEVEL": 1, # Useful for categorizing anomalies based on certain thresholds "SEVERITY_LEVELS": { "low": 1.5, "medium": 2.5, "high": 10 }, # The following section controls the plots on the anomaly report "VISUALIZATION": { # Controls the visualizations that are displayed in the anomaly report. There # are 5 different categories of data to be displayed. Each one of them has their # own section # Since Qualipy by default gathers raw row counts for each data input, this section # will show show the overal trend of data size over time "row_counts": { # Include this if you want to view the counts of the most recent batch. "include_bar_of_latest": { "use": true, "diff": true, "show_by_default": true }, # Include this if you want to get a summary overview of the row counts "include_summary": { "use": true, "show_by_default": true } }, # This section is for viewing all metrics that return a numerical data type, # such as float and int "trend": { "include_bar_of_latest": { "use": true, # You can use this to only include certain metrics "variables": [ "measurement_concept_id_measurement_number_of_unique_", "drug_concept_id_drug_number_of_unique_", ], "diff": false, "show_by_default": true }, "include_summary": { "use": true, "show_by_default": true }, # Specify an sst to add a layer to the plot that include_summary # change point detection. The value refers to how far to look back "sst": 3, # Set this to true if each batch should have a point. Note, this # can look unappealling with a large number of batches "point": true, # Set this to include a rolling mean for each trend "n_steps": 10, # Set this if you want to include a layer in the plot that shows # the difference from a previous value "add_diff": { # Set this to determine how far to look back "shift": 1 } }, # Add this to visualize all categorical variables (those returning dicts # with counts). "proportion": { } # This section includes analysis on the missingness of the data "missing": { # By default, it will only show data that contains any actual missing data. # To also show data without any missingness, set this to True "include_0": true, "include_bar_of_latest": { "use": true, "diff": false } }, }, # This section is for customizing the metric names and hover-over descriptions, # in order to potentially make them more human-readable "DISPLAY_NAMES": { # This default list is automatically populated by the function name # and description from the function definition "DEFAULT": { "number_of_unique": { "display_name": "number_of_unique_values", "description": "A total count of the number of unique values in the batch" } }, "CUSTOM": { "random_function": { "display_name": "Random Function", "description": "Description of random_function" } }, } }, }