Configuration File¶
The configuration file drives a lot of the reporting and anomaly detection in Qualipy. The default
config.json file is created upon running qualipy generate-config
through the CLI.
- Some notes on the configuration:
how to reference metrics…
The following json has all available keys one could set.
{
# The sqlalchemy like string that tells Qualipy where to store all data.
# By default this is set to a sqlite file within the config directory
"QUALIPY_DB": "sqlite:////tmp/.qualipy/qualipy.db"",
# If using a database like postgres that supports schemas, setting this
# would place all data in that specific schema
"SCHEMA": "",
# the name of the project were configuring. This corresponds to your Qualipy
# pipeline.
"example_project": {
# This is where you specify anomaly specific settings
"ANOMALY_ARGS": {
# Each anomaly score corresponds to a standardized value. In general,
# anything over 1 is considered an anomaly, but this could be used to
# control the severity of the outliers
"importance_level": 1.3,
# This is used to set "rules" for any specific column. See what rules
# are available to use **here (set this)
"specific": {
# to reference an aggregate, use run_name + column_name + metric_name + arguments (if any)
"rows_my_column_count_": {
# "increasing" is just an example of a function that checks whether
# or not the aggregate is always increasing. This might be useful
# when you're inspecting the total size of a database
"increasing": {
# Can be turned on and off
"use": true,
# Since this is not a machine learning based approach, you have
# to set your own severity level when using custom rules
"severity": 3
}
},
}
},
# What anomaly model to use. See the Anomaly Detection guide for different
# options
"ANOMALY_MODEL": "prophet",
# Date format to use on reports
"DATE_FORMAT": "%Y-%m-%d",
# Minimum severity level to set for filtering out numerical
# anomalies on the anomaly report
"NUM_SEVERITY_LEVEL": 1,
# Minimum severity level to set for filtering out categorical
# anomalies on the anomaly report
"CAT_SEVERITY_LEVEL": 1,
# Useful for categorizing anomalies based on certain thresholds
"SEVERITY_LEVELS": {
"low": 1.5,
"medium": 2.5,
"high": 10
},
# The following section controls the plots on the anomaly report
"VISUALIZATION": {
# Controls the visualizations that are displayed in the anomaly report. There
# are 5 different categories of data to be displayed. Each one of them has their
# own section
# Since Qualipy by default gathers raw row counts for each data input, this section
# will show show the overal trend of data size over time
"row_counts": {
# Include this if you want to view the counts of the most recent batch.
"include_bar_of_latest": {
"use": true,
"diff": true,
"show_by_default": true
},
# Include this if you want to get a summary overview of the row counts
"include_summary": {
"use": true,
"show_by_default": true
}
},
# This section is for viewing all metrics that return a numerical data type,
# such as float and int
"trend": {
"include_bar_of_latest": {
"use": true,
# You can use this to only include certain metrics
"variables": [
"measurement_concept_id_measurement_number_of_unique_",
"drug_concept_id_drug_number_of_unique_",
],
"diff": false,
"show_by_default": true
},
"include_summary": {
"use": true,
"show_by_default": true
},
# Specify an sst to add a layer to the plot that include_summary
# change point detection. The value refers to how far to look back
"sst": 3,
# Set this to true if each batch should have a point. Note, this
# can look unappealling with a large number of batches
"point": true,
# Set this to include a rolling mean for each trend
"n_steps": 10,
# Set this if you want to include a layer in the plot that shows
# the difference from a previous value
"add_diff": {
# Set this to determine how far to look back
"shift": 1
}
},
# Add this to visualize all categorical variables (those returning dicts
# with counts).
"proportion": {
}
# This section includes analysis on the missingness of the data
"missing": {
# By default, it will only show data that contains any actual missing data.
# To also show data without any missingness, set this to True
"include_0": true,
"include_bar_of_latest": {
"use": true,
"diff": false
}
},
},
# This section is for customizing the metric names and hover-over descriptions,
# in order to potentially make them more human-readable
"DISPLAY_NAMES": {
# This default list is automatically populated by the function name
# and description from the function definition
"DEFAULT": {
"number_of_unique": {
"display_name": "number_of_unique_values",
"description": "A total count of the number of unique values in the batch"
}
},
"CUSTOM": {
"random_function": {
"display_name": "Random Function",
"description": "Description of random_function"
}
},
}
},
}