kelp.models.model¶
Reference for the core model and column configuration models.
kelp.models.model.TableType
¶
kelp.models.model.Model
pydantic-model
¶
Bases: BaseModel
Model definition in Unity Catalog.
Represents a metadata model in Kelp with configuration that maps to Databricks Unity Catalog objects.
Show JSON schema:
{
"$defs": {
"Column": {
"description": "Column definition for a model.",
"properties": {
"name": {
"description": "Column name",
"title": "Name",
"type": "string"
},
"description": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Human-readable description of the column",
"title": "Description"
},
"data_type": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "SQL data type of the column",
"title": "Data Type"
},
"nullable": {
"default": true,
"description": "Whether the column allows NULL values",
"title": "Nullable",
"type": "boolean"
},
"generated": {
"anyOf": [
{
"discriminator": {
"mapping": {
"expression": "#/$defs/GeneratedExpressionColumnConfig",
"identity": "#/$defs/GeneratedIdentityColumnConfig"
},
"propertyName": "type"
},
"oneOf": [
{
"$ref": "#/$defs/GeneratedIdentityColumnConfig"
},
{
"$ref": "#/$defs/GeneratedExpressionColumnConfig"
}
]
},
{
"type": "null"
}
],
"default": null,
"description": "Configuration for generated columns (identity or expression based)",
"title": "Generated"
},
"tags": {
"additionalProperties": {
"type": "string"
},
"description": "Metadata tags for the column",
"title": "Tags",
"type": "object"
}
},
"required": [
"name"
],
"title": "Column",
"type": "object"
},
"DQXQuality": {
"properties": {
"engine": {
"const": "dqx",
"description": "Quality engine type",
"title": "Engine",
"type": "string"
},
"level": {
"default": "row",
"description": "Level at which quality is enforced",
"enum": [
"row",
"table"
],
"title": "Level",
"type": "string"
},
"sdp_expect_level": {
"default": "warn",
"description": "Action for quality violations: warn, fail, drop, or deactivate",
"enum": [
"warn",
"fail",
"drop",
"deactivate"
],
"title": "Sdp Expect Level",
"type": "string"
},
"sdp_quarantine": {
"default": false,
"description": "Whether to quarantine rows failing quality checks",
"title": "Sdp Quarantine",
"type": "boolean"
},
"checks": {
"description": "Quality check configurations",
"items": {
"additionalProperties": true,
"type": "object"
},
"title": "Checks",
"type": "array"
}
},
"required": [
"engine"
],
"title": "DQXQuality",
"type": "object"
},
"ForeignKeyConstraint": {
"properties": {
"name": {
"description": "Constraint name",
"title": "Name",
"type": "string"
},
"type": {
"default": "foreign_key",
"description": "Constraint type identifier",
"title": "Type",
"type": "string"
},
"columns": {
"description": "List of local column names",
"items": {
"type": "string"
},
"title": "Columns",
"type": "array"
},
"reference_table": {
"description": "Fully qualified name of the referenced table",
"title": "Reference Table",
"type": "string"
},
"reference_columns": {
"description": "List of column names in the referenced table",
"items": {
"type": "string"
},
"title": "Reference Columns",
"type": "array"
}
},
"required": [
"name",
"reference_table"
],
"title": "ForeignKeyConstraint",
"type": "object"
},
"GeneratedExpressionColumnConfig": {
"properties": {
"type": {
"const": "expression",
"description": "Column type identifier",
"title": "Type",
"type": "string"
},
"expression": {
"description": "SQL expression used to generate the column value",
"title": "Expression",
"type": "string"
}
},
"required": [
"type",
"expression"
],
"title": "GeneratedExpressionColumnConfig",
"type": "object"
},
"GeneratedIdentityColumnConfig": {
"properties": {
"type": {
"const": "identity",
"description": "Column type identifier",
"title": "Type",
"type": "string"
},
"as_default": {
"default": false,
"description": "Generated as default (True) or always (False)",
"title": "As Default",
"type": "boolean"
},
"start_with": {
"default": 1,
"description": "Starting value for the identity sequence",
"title": "Start With",
"type": "integer"
},
"increment_by": {
"default": 1,
"description": "Increment step for the identity sequence",
"title": "Increment By",
"type": "integer"
}
},
"required": [
"type"
],
"title": "GeneratedIdentityColumnConfig",
"type": "object"
},
"PrimaryKeyConstraint": {
"properties": {
"name": {
"description": "Constraint name",
"title": "Name",
"type": "string"
},
"type": {
"default": "primary_key",
"description": "Constraint type identifier",
"title": "Type",
"type": "string"
},
"columns": {
"description": "List of column names forming the primary key",
"items": {
"type": "string"
},
"title": "Columns",
"type": "array"
}
},
"required": [
"name"
],
"title": "PrimaryKeyConstraint",
"type": "object"
},
"SDPQuality": {
"properties": {
"engine": {
"const": "sdp",
"description": "Quality engine type",
"title": "Engine",
"type": "string"
},
"level": {
"const": "row",
"default": "row",
"description": "Quality enforcement level",
"title": "Level",
"type": "string"
},
"expect_all": {
"additionalProperties": {
"type": "string"
},
"description": "SQL expressions that must pass",
"title": "Expect All",
"type": "object"
},
"expect_all_or_drop": {
"additionalProperties": {
"type": "string"
},
"description": "SQL expressions; failing rows are dropped",
"title": "Expect All Or Drop",
"type": "object"
},
"expect_all_or_fail": {
"additionalProperties": {
"type": "string"
},
"description": "SQL expressions; job fails if any expression fails",
"title": "Expect All Or Fail",
"type": "object"
},
"expect_all_or_quarantine": {
"additionalProperties": {
"type": "string"
},
"description": "SQL expressions; failing rows are quarantined",
"title": "Expect All Or Quarantine",
"type": "object"
}
},
"required": [
"engine"
],
"title": "SDPQuality",
"type": "object"
},
"TableType": {
"enum": [
"external",
"external_shallow_clone",
"foreign",
"managed",
"managed_shallow_clone",
"materialized_view",
"metric_view",
"streaming_table",
"view"
],
"title": "TableType",
"type": "string"
}
},
"description": "Model definition in Unity Catalog.\n\nRepresents a metadata model in Kelp with configuration that maps to\nDatabricks Unity Catalog objects.",
"properties": {
"table_type": {
"$ref": "#/$defs/TableType",
"default": "managed",
"description": "Type of table: managed, external, view, streaming_table, etc."
},
"catalog": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Unity Catalog name",
"title": "Catalog"
},
"schema": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Schema/database name",
"title": "Schema"
},
"name": {
"description": "Model name",
"title": "Name",
"type": "string"
},
"description": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Human-readable description of the model",
"title": "Description"
},
"spark_conf": {
"additionalProperties": true,
"description": "Spark configuration properties",
"title": "Spark Conf",
"type": "object"
},
"table_properties": {
"additionalProperties": true,
"description": "Databricks table properties",
"title": "Table Properties",
"type": "object"
},
"path": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Physical path for external tables or custom locations",
"title": "Path"
},
"partition_cols": {
"description": "List of column names for partitioning",
"items": {
"type": "string"
},
"title": "Partition Cols",
"type": "array"
},
"cluster_by_auto": {
"default": false,
"description": "Enable automatic clustering optimization",
"title": "Cluster By Auto",
"type": "boolean"
},
"cluster_by": {
"description": "List of column names for explicit clustering (max 4)",
"items": {
"type": "string"
},
"maxItems": 4,
"title": "Cluster By",
"type": "array"
},
"row_filter": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "SQL expression to filter rows based on security policies",
"title": "Row Filter"
},
"columns": {
"description": "Column definitions for the model",
"items": {
"$ref": "#/$defs/Column"
},
"title": "Columns",
"type": "array"
},
"quality": {
"anyOf": [
{
"discriminator": {
"mapping": {
"dqx": "#/$defs/DQXQuality",
"sdp": "#/$defs/SDPQuality"
},
"propertyName": "engine"
},
"oneOf": [
{
"$ref": "#/$defs/SDPQuality"
},
{
"$ref": "#/$defs/DQXQuality"
}
]
},
{
"type": "null"
}
],
"default": null,
"description": "Data quality configuration using SDPQuality or DQXQuality",
"title": "Quality"
},
"constraints": {
"description": "Constraints like primary key or foreign key",
"items": {
"anyOf": [
{
"$ref": "#/$defs/PrimaryKeyConstraint"
},
{
"$ref": "#/$defs/ForeignKeyConstraint"
}
]
},
"title": "Constraints",
"type": "array"
},
"tags": {
"additionalProperties": {
"type": "string"
},
"description": "Metadata tags for the model",
"title": "Tags",
"type": "object"
},
"meta": {
"additionalProperties": true,
"description": "Generic user-defined metadata for filtering and grouping",
"title": "Meta",
"type": "object"
}
},
"required": [
"name"
],
"title": "Model",
"type": "object"
}
Config:
validate_by_name:Truevalidate_by_alias:Trueserialize_by_alias:Trueuse_enum_values:True
Fields:
-
origin_file_path(SkipJsonSchema[str] | None) -
table_type(TableType) -
catalog(str | None) -
schema_(str | None) -
name(str) -
description(str | None) -
spark_conf(dict) -
table_properties(dict) -
path(str | None) -
partition_cols(list[str]) -
cluster_by_auto(bool) -
cluster_by(list[str]) -
row_filter(str | None) -
columns(list[Column]) -
quality(SDPQuality | DQXQuality | None) -
constraints(list[PrimaryKeyConstraint | ForeignKeyConstraint]) -
tags(dict[str, str]) -
meta(dict[str, Any]) -
raw_config(SkipJsonSchema[dict])
Validators:
-
_serialize_complex_property_values→table_properties -
_validate_catalog_requires_schema
origin_file_path
pydantic-field
¶
Path to the source YAML file defining this model
table_type
pydantic-field
¶
table_type = MANAGED
Type of table: managed, external, view, streaming_table, etc.
row_filter
pydantic-field
¶
SQL expression to filter rows based on security policies
raw_config
pydantic-field
¶
Original unparsed configuration preserving placeholder variables
model_config
class-attribute
instance-attribute
¶
model_config = ConfigDict(
validate_by_name=True,
validate_by_alias=True,
serialize_by_alias=True,
use_enum_values=True,
)
deserialize_property_values
staticmethod
¶
Deserialize JSON-encoded property values back to complex types.
Used by the YAML writer to restore the original structure (list, dict) so that the YAML output uses native YAML types instead of JSON strings.
Source code in src/kelp/models/model.py
get_qualified_name
¶
Get the fully qualified model name including database/schema if applicable.
Source code in src/kelp/models/model.py
kelp.models.model.Column
pydantic-model
¶
Bases: BaseModel
Column definition for a model.
Show JSON schema:
{
"$defs": {
"GeneratedExpressionColumnConfig": {
"properties": {
"type": {
"const": "expression",
"description": "Column type identifier",
"title": "Type",
"type": "string"
},
"expression": {
"description": "SQL expression used to generate the column value",
"title": "Expression",
"type": "string"
}
},
"required": [
"type",
"expression"
],
"title": "GeneratedExpressionColumnConfig",
"type": "object"
},
"GeneratedIdentityColumnConfig": {
"properties": {
"type": {
"const": "identity",
"description": "Column type identifier",
"title": "Type",
"type": "string"
},
"as_default": {
"default": false,
"description": "Generated as default (True) or always (False)",
"title": "As Default",
"type": "boolean"
},
"start_with": {
"default": 1,
"description": "Starting value for the identity sequence",
"title": "Start With",
"type": "integer"
},
"increment_by": {
"default": 1,
"description": "Increment step for the identity sequence",
"title": "Increment By",
"type": "integer"
}
},
"required": [
"type"
],
"title": "GeneratedIdentityColumnConfig",
"type": "object"
}
},
"description": "Column definition for a model.",
"properties": {
"name": {
"description": "Column name",
"title": "Name",
"type": "string"
},
"description": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Human-readable description of the column",
"title": "Description"
},
"data_type": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "SQL data type of the column",
"title": "Data Type"
},
"nullable": {
"default": true,
"description": "Whether the column allows NULL values",
"title": "Nullable",
"type": "boolean"
},
"generated": {
"anyOf": [
{
"discriminator": {
"mapping": {
"expression": "#/$defs/GeneratedExpressionColumnConfig",
"identity": "#/$defs/GeneratedIdentityColumnConfig"
},
"propertyName": "type"
},
"oneOf": [
{
"$ref": "#/$defs/GeneratedIdentityColumnConfig"
},
{
"$ref": "#/$defs/GeneratedExpressionColumnConfig"
}
]
},
{
"type": "null"
}
],
"default": null,
"description": "Configuration for generated columns (identity or expression based)",
"title": "Generated"
},
"tags": {
"additionalProperties": {
"type": "string"
},
"description": "Metadata tags for the column",
"title": "Tags",
"type": "object"
}
},
"required": [
"name"
],
"title": "Column",
"type": "object"
}
Fields:
-
name(str) -
description(str | None) -
data_type(str | None) -
nullable(bool) -
generated(GeneratedIdentityColumnConfig | GeneratedExpressionColumnConfig | None) -
tags(dict[str, str])
generated
pydantic-field
¶
Configuration for generated columns (identity or expression based)
kelp.models.model.GeneratedIdentityColumnConfig
pydantic-model
¶
Bases: BaseModel
Show JSON schema:
{
"properties": {
"type": {
"const": "identity",
"description": "Column type identifier",
"title": "Type",
"type": "string"
},
"as_default": {
"default": false,
"description": "Generated as default (True) or always (False)",
"title": "As Default",
"type": "boolean"
},
"start_with": {
"default": 1,
"description": "Starting value for the identity sequence",
"title": "Start With",
"type": "integer"
},
"increment_by": {
"default": 1,
"description": "Increment step for the identity sequence",
"title": "Increment By",
"type": "integer"
}
},
"required": [
"type"
],
"title": "GeneratedIdentityColumnConfig",
"type": "object"
}
Fields:
-
type(Literal['identity']) -
as_default(bool) -
start_with(int) -
increment_by(int)
kelp.models.model.GeneratedExpressionColumnConfig
pydantic-model
¶
Bases: BaseModel
Show JSON schema:
{
"properties": {
"type": {
"const": "expression",
"description": "Column type identifier",
"title": "Type",
"type": "string"
},
"expression": {
"description": "SQL expression used to generate the column value",
"title": "Expression",
"type": "string"
}
},
"required": [
"type",
"expression"
],
"title": "GeneratedExpressionColumnConfig",
"type": "object"
}
Fields:
-
type(Literal['expression']) -
expression(str)
kelp.models.model.Constraint
pydantic-model
¶
kelp.models.model.PrimaryKeyConstraint
pydantic-model
¶
Bases: Constraint
Show JSON schema:
{
"properties": {
"name": {
"description": "Constraint name",
"title": "Name",
"type": "string"
},
"type": {
"default": "primary_key",
"description": "Constraint type identifier",
"title": "Type",
"type": "string"
},
"columns": {
"description": "List of column names forming the primary key",
"items": {
"type": "string"
},
"title": "Columns",
"type": "array"
}
},
"required": [
"name"
],
"title": "PrimaryKeyConstraint",
"type": "object"
}
Fields:
kelp.models.model.ForeignKeyConstraint
pydantic-model
¶
Bases: Constraint
Show JSON schema:
{
"properties": {
"name": {
"description": "Constraint name",
"title": "Name",
"type": "string"
},
"type": {
"default": "foreign_key",
"description": "Constraint type identifier",
"title": "Type",
"type": "string"
},
"columns": {
"description": "List of local column names",
"items": {
"type": "string"
},
"title": "Columns",
"type": "array"
},
"reference_table": {
"description": "Fully qualified name of the referenced table",
"title": "Reference Table",
"type": "string"
},
"reference_columns": {
"description": "List of column names in the referenced table",
"items": {
"type": "string"
},
"title": "Reference Columns",
"type": "array"
}
},
"required": [
"name",
"reference_table"
],
"title": "ForeignKeyConstraint",
"type": "object"
}
Fields:
-
name(str) -
type(str) -
columns(list[str]) -
reference_table(str) -
reference_columns(list[str])
kelp.models.model.Quality
pydantic-model
¶
Bases: BaseModel
Show JSON schema:
{
"properties": {
"engine": {
"description": "Quality engine type",
"title": "Engine",
"type": "string"
},
"level": {
"default": "row",
"description": "Level at which quality is enforced",
"enum": [
"row",
"table"
],
"title": "Level",
"type": "string"
}
},
"required": [
"engine"
],
"title": "Quality",
"type": "object"
}
Fields:
kelp.models.model.SDPQuality
pydantic-model
¶
Bases: Quality
Show JSON schema:
{
"properties": {
"engine": {
"const": "sdp",
"description": "Quality engine type",
"title": "Engine",
"type": "string"
},
"level": {
"const": "row",
"default": "row",
"description": "Quality enforcement level",
"title": "Level",
"type": "string"
},
"expect_all": {
"additionalProperties": {
"type": "string"
},
"description": "SQL expressions that must pass",
"title": "Expect All",
"type": "object"
},
"expect_all_or_drop": {
"additionalProperties": {
"type": "string"
},
"description": "SQL expressions; failing rows are dropped",
"title": "Expect All Or Drop",
"type": "object"
},
"expect_all_or_fail": {
"additionalProperties": {
"type": "string"
},
"description": "SQL expressions; job fails if any expression fails",
"title": "Expect All Or Fail",
"type": "object"
},
"expect_all_or_quarantine": {
"additionalProperties": {
"type": "string"
},
"description": "SQL expressions; failing rows are quarantined",
"title": "Expect All Or Quarantine",
"type": "object"
}
},
"required": [
"engine"
],
"title": "SDPQuality",
"type": "object"
}
Fields:
-
engine(Literal['sdp']) -
level(Literal['row']) -
expect_all(dict[str, str]) -
expect_all_or_drop(dict[str, str]) -
expect_all_or_fail(dict[str, str]) -
expect_all_or_quarantine(dict[str, str])
kelp.models.model.DQXQuality
pydantic-model
¶
Bases: Quality
Show JSON schema:
{
"properties": {
"engine": {
"const": "dqx",
"description": "Quality engine type",
"title": "Engine",
"type": "string"
},
"level": {
"default": "row",
"description": "Level at which quality is enforced",
"enum": [
"row",
"table"
],
"title": "Level",
"type": "string"
},
"sdp_expect_level": {
"default": "warn",
"description": "Action for quality violations: warn, fail, drop, or deactivate",
"enum": [
"warn",
"fail",
"drop",
"deactivate"
],
"title": "Sdp Expect Level",
"type": "string"
},
"sdp_quarantine": {
"default": false,
"description": "Whether to quarantine rows failing quality checks",
"title": "Sdp Quarantine",
"type": "boolean"
},
"checks": {
"description": "Quality check configurations",
"items": {
"additionalProperties": true,
"type": "object"
},
"title": "Checks",
"type": "array"
}
},
"required": [
"engine"
],
"title": "DQXQuality",
"type": "object"
}
Fields:
-
level(Literal['row', 'table']) -
engine(Literal['dqx']) -
sdp_expect_level(Literal['warn', 'fail', 'drop', 'deactivate']) -
sdp_quarantine(bool) -
checks(list[dict])