Skip to content

Dataclasses

If you don't want to use pydantic's BaseModel you can instead get the same data validation on standard dataclasses (introduced in Python 3.7).

from datetime import datetime
from pydantic.dataclasses import dataclass


@dataclass
class User:
    id: int
    name: str = 'John Doe'
    signup_ts: datetime = None


user = User(id='42', signup_ts='2032-06-21T12:00')
print(user)
#> User(id=42, name='John Doe', signup_ts=datetime.datetime(2032, 6, 21, 12, 0))

(This script is complete, it should run "as is")

Note

Keep in mind that pydantic.dataclasses.dataclass is a drop-in replacement for dataclasses.dataclass with validation, not a replacement for pydantic.BaseModel (with a small difference in how initialization hooks work). There are cases where subclassing pydantic.BaseModel is the better choice.

For more information and discussion see pydantic/pydantic#710.

You can use all the standard pydantic field types, and the resulting dataclass will be identical to the one created by the standard library dataclass decorator.

The underlying model and its schema can be accessed through __pydantic_model__. Also, fields that require a default_factory can be specified by either a pydantic.Field or a dataclasses.field.

import dataclasses
from typing import List, Optional

from pydantic import Field
from pydantic.dataclasses import dataclass


@dataclass
class User:
    id: int
    name: str = 'John Doe'
    friends: List[int] = dataclasses.field(default_factory=lambda: [0])
    age: Optional[int] = dataclasses.field(
        default=None,
        metadata=dict(title='The age of the user', description='do not lie!')
    )
    height: Optional[int] = Field(None, title='The height in cm', ge=50, le=300)


user = User(id='42')
print(user.__pydantic_model__.schema())
"""
{
    'title': 'User',
    'type': 'object',
    'properties': {
        'id': {'title': 'Id', 'type': 'integer'},
        'name': {
            'title': 'Name',
            'default': 'John Doe',
            'type': 'string',
        },
        'friends': {
            'title': 'Friends',
            'type': 'array',
            'items': {'type': 'integer'},
        },
        'age': {
            'title': 'The age of the user',
            'description': 'do not lie!',
            'type': 'integer',
        },
        'height': {
            'title': 'The height in cm',
            'minimum': 50,
            'maximum': 300,
            'type': 'integer',
        },
    },
    'required': ['id'],
}
"""
import dataclasses
from typing import Optional

from pydantic import Field
from pydantic.dataclasses import dataclass


@dataclass
class User:
    id: int
    name: str = 'John Doe'
    friends: list[int] = dataclasses.field(default_factory=lambda: [0])
    age: Optional[int] = dataclasses.field(
        default=None,
        metadata=dict(title='The age of the user', description='do not lie!')
    )
    height: Optional[int] = Field(None, title='The height in cm', ge=50, le=300)


user = User(id='42')
print(user.__pydantic_model__.schema())
"""
{
    'title': 'User',
    'type': 'object',
    'properties': {
        'id': {'title': 'Id', 'type': 'integer'},
        'name': {
            'title': 'Name',
            'default': 'John Doe',
            'type': 'string',
        },
        'friends': {
            'title': 'Friends',
            'type': 'array',
            'items': {'type': 'integer'},
        },
        'age': {
            'title': 'The age of the user',
            'description': 'do not lie!',
            'type': 'integer',
        },
        'height': {
            'title': 'The height in cm',
            'minimum': 50,
            'maximum': 300,
            'type': 'integer',
        },
    },
    'required': ['id'],
}
"""
import dataclasses

from pydantic import Field
from pydantic.dataclasses import dataclass


@dataclass
class User:
    id: int
    name: str = 'John Doe'
    friends: list[int] = dataclasses.field(default_factory=lambda: [0])
    age: int | None = dataclasses.field(
        default=None,
        metadata=dict(title='The age of the user', description='do not lie!')
    )
    height: int | None = Field(None, title='The height in cm', ge=50, le=300)


user = User(id='42')
print(user.__pydantic_model__.schema())
"""
{
    'title': 'User',
    'type': 'object',
    'properties': {
        'id': {'title': 'Id', 'type': 'integer'},
        'name': {
            'title': 'Name',
            'default': 'John Doe',
            'type': 'string',
        },
        'friends': {
            'title': 'Friends',
            'type': 'array',
            'items': {'type': 'integer'},
        },
        'age': {
            'title': 'The age of the user',
            'description': 'do not lie!',
            'type': 'integer',
        },
        'height': {
            'title': 'The height in cm',
            'minimum': 50,
            'maximum': 300,
            'type': 'integer',
        },
    },
    'required': ['id'],
}
"""

(This script is complete, it should run "as is")

pydantic.dataclasses.dataclass's arguments are the same as the standard decorator, except one extra keyword argument config which has the same meaning as Config.

Warning

After v1.2, The Mypy plugin must be installed to type check pydantic dataclasses.

For more information about combining validators with dataclasses, see dataclass validators.

Dataclass Config

If you want to modify the Config like you would with a BaseModel, you have three options:

from pydantic import ConfigDict
from pydantic.dataclasses import dataclass


# Option 1 - use directly a dict
# Note: `mypy` will still raise typo error
@dataclass(config=dict(validate_assignment=True))
class MyDataclass1:
    a: int


# Option 2 - use `ConfigDict`
# (same as before at runtime since it's a `TypedDict` but with intellisense)
@dataclass(config=ConfigDict(validate_assignment=True))
class MyDataclass2:
    a: int


# Option 3 - use a `Config` class like for a `BaseModel`
class Config:
    validate_assignment = True


@dataclass(config=Config)
class MyDataclass3:
    a: int

(This script is complete, it should run "as is")

Warning

After v1.10, pydantic dataclasses support Config.extra but some default behaviour of stdlib dataclasses may prevail. For example, when printing a pydantic dataclass with allowed extra fields, it will still use the __str__ method of stdlib dataclass and show only the required fields. This may be improved further in the future.

Nested dataclasses

Nested dataclasses are supported both in dataclasses and normal models.

from pydantic import AnyUrl
from pydantic.dataclasses import dataclass


@dataclass
class NavbarButton:
    href: AnyUrl


@dataclass
class Navbar:
    button: NavbarButton


navbar = Navbar(button=('https://example.com',))
print(navbar)
#> Navbar(button=NavbarButton(href=AnyUrl('https://example.com', scheme='https',
#> host='example.com', tld='com', host_type='domain')))

(This script is complete, it should run "as is")

Dataclasses attributes can be populated by tuples, dictionaries or instances of the dataclass itself.

Stdlib dataclasses and pydantic dataclasses

Convert stdlib dataclasses into pydantic dataclasses

Stdlib dataclasses (nested or not) can be easily converted into pydantic dataclasses by just decorating them with pydantic.dataclasses.dataclass. Pydantic will enhance the given stdlib dataclass but won't alter the default behaviour (i.e. without validation). It will instead create a wrapper around it to trigger validation that will act like a plain proxy. The stdlib dataclass can still be accessed via the __dataclass__ attribute (see example below).

import dataclasses
from datetime import datetime
from typing import Optional

import pydantic


@dataclasses.dataclass
class Meta:
    modified_date: Optional[datetime]
    seen_count: int


@dataclasses.dataclass
class File(Meta):
    filename: str


# `ValidatedFile` will be a proxy around `File`
ValidatedFile = pydantic.dataclasses.dataclass(File)

# the original dataclass is the `__dataclass__` attribute
assert ValidatedFile.__dataclass__ is File


validated_file = ValidatedFile(
    filename=b'thefilename',
    modified_date='2020-01-01T00:00',
    seen_count='7',
)
print(validated_file)
#> File(modified_date=datetime.datetime(2020, 1, 1, 0, 0), seen_count=7,
#> filename='thefilename')

try:
    ValidatedFile(
        filename=['not', 'a', 'string'],
        modified_date=None,
        seen_count=3,
    )
except pydantic.ValidationError as e:
    print(e)
    """
    1 validation error for File
    filename
      str type expected (type=type_error.str)
    """

# `File` is not altered and still does no validation by default
print(File(
#> File(modified_date=None, seen_count=3, filename=['not', 'a', 'string'])
    filename=['not', 'a', 'string'],
    modified_date=None,
    seen_count=3,
))
import dataclasses
from datetime import datetime

import pydantic


@dataclasses.dataclass
class Meta:
    modified_date: datetime | None
    seen_count: int


@dataclasses.dataclass
class File(Meta):
    filename: str


# `ValidatedFile` will be a proxy around `File`
ValidatedFile = pydantic.dataclasses.dataclass(File)

# the original dataclass is the `__dataclass__` attribute
assert ValidatedFile.__dataclass__ is File


validated_file = ValidatedFile(
    filename=b'thefilename',
    modified_date='2020-01-01T00:00',
    seen_count='7',
)
print(validated_file)
#> File(modified_date=datetime.datetime(2020, 1, 1, 0, 0), seen_count=7,
#> filename='thefilename')

try:
    ValidatedFile(
        filename=['not', 'a', 'string'],
        modified_date=None,
        seen_count=3,
    )
except pydantic.ValidationError as e:
    print(e)
    """
    1 validation error for File
    filename
      str type expected (type=type_error.str)
    """

# `File` is not altered and still does no validation by default
print(File(
#> File(modified_date=None, seen_count=3, filename=['not', 'a', 'string'])
    filename=['not', 'a', 'string'],
    modified_date=None,
    seen_count=3,
))

(This script is complete, it should run "as is")

Choose when to trigger validation

As soon as your stdlib dataclass has been decorated with pydantic dataclass decorator, magic methods have been added to validate input data. If you want, you can still keep using your dataclass and choose when to trigger it.

import dataclasses

from pydantic import ValidationError
from pydantic.dataclasses import dataclass as pydantic_dataclass, set_validation


@dataclasses.dataclass
class User:
    id: int
    name: str


# Enhance stdlib dataclass
pydantic_dataclass(User)


user1 = User(id='whatever', name='I want')

# validate data of `user1`
try:
    user1.__pydantic_validate_values__()
except ValidationError as e:
    print(e)
    """
    1 validation error for User
    id
      value is not a valid integer (type=type_error.integer)
    """

# Enforce validation
try:
    with set_validation(User, True):
        User(id='whatever', name='I want')
except ValidationError as e:
    print(e)
    """
    1 validation error for User
    id
      value is not a valid integer (type=type_error.integer)
    """

(This script is complete, it should run "as is")

Inherit from stdlib dataclasses

Stdlib dataclasses (nested or not) can also be inherited and pydantic will automatically validate all the inherited fields.

import dataclasses

import pydantic


@dataclasses.dataclass
class Z:
    z: int


@dataclasses.dataclass
class Y(Z):
    y: int = 0


@pydantic.dataclasses.dataclass
class X(Y):
    x: int = 0


foo = X(x=b'1', y='2', z='3')
print(foo)
#> X(z=3, y=2, x=1)

try:
    X(z='pika')
except pydantic.ValidationError as e:
    print(e)
    """
    1 validation error for X
    z
      value is not a valid integer (type=type_error.integer)
    """

(This script is complete, it should run "as is")

Use of stdlib dataclasses with BaseModel

Bear in mind that stdlib dataclasses (nested or not) are automatically converted into pydantic dataclasses when mixed with BaseModel! Furthermore the generated pydantic dataclass will have the exact same configuration (order, frozen, ...) as the original one.

import dataclasses
from datetime import datetime
from typing import Optional

from pydantic import BaseModel, ValidationError


@dataclasses.dataclass(frozen=True)
class User:
    name: str


@dataclasses.dataclass
class File:
    filename: str
    last_modification_time: Optional[datetime] = None


class Foo(BaseModel):
    file: File
    user: Optional[User] = None


file = File(
    filename=['not', 'a', 'string'],
    last_modification_time='2020-01-01T00:00',
)  # nothing is validated as expected
print(file)
#> File(filename=['not', 'a', 'string'],
#> last_modification_time='2020-01-01T00:00')

try:
    Foo(file=file)
except ValidationError as e:
    print(e)
    """
    1 validation error for Foo
    file -> filename
      str type expected (type=type_error.str)
    """

foo = Foo(file=File(filename='myfile'), user=User(name='pika'))
try:
    foo.user.name = 'bulbi'
except dataclasses.FrozenInstanceError as e:
    print(e)
    #> cannot assign to field 'name'
import dataclasses
from datetime import datetime

from pydantic import BaseModel, ValidationError


@dataclasses.dataclass(frozen=True)
class User:
    name: str


@dataclasses.dataclass
class File:
    filename: str
    last_modification_time: datetime | None = None


class Foo(BaseModel):
    file: File
    user: User | None = None


file = File(
    filename=['not', 'a', 'string'],
    last_modification_time='2020-01-01T00:00',
)  # nothing is validated as expected
print(file)
#> File(filename=['not', 'a', 'string'],
#> last_modification_time='2020-01-01T00:00')

try:
    Foo(file=file)
except ValidationError as e:
    print(e)
    """
    1 validation error for Foo
    file -> filename
      str type expected (type=type_error.str)
    """

foo = Foo(file=File(filename='myfile'), user=User(name='pika'))
try:
    foo.user.name = 'bulbi'
except dataclasses.FrozenInstanceError as e:
    print(e)
    #> cannot assign to field 'name'

(This script is complete, it should run "as is")

Use custom types

Since stdlib dataclasses are automatically converted to add validation using custom types may cause some unexpected behaviour. In this case you can simply add arbitrary_types_allowed in the config!

import dataclasses

import pydantic


class ArbitraryType:
    def __init__(self, value):
        self.value = value

    def __repr__(self):
        return f'ArbitraryType(value={self.value!r})'


@dataclasses.dataclass
class DC:
    a: ArbitraryType
    b: str


# valid as it is a builtin dataclass without validation
my_dc = DC(a=ArbitraryType(value=3), b='qwe')

try:
    class Model(pydantic.BaseModel):
        dc: DC
        other: str

    Model(dc=my_dc, other='other')
except RuntimeError as e:  # invalid as it is now a pydantic dataclass
    print(e)
    """
    no validator found for <class
    'dataclasses_arbitrary_types_allowed.ArbitraryType'>, see
    `arbitrary_types_allowed` in Config
    """


class Model(pydantic.BaseModel):
    dc: DC
    other: str

    class Config:
        arbitrary_types_allowed = True


m = Model(dc=my_dc, other='other')
print(repr(m))
#> Model(dc=DC(a=ArbitraryType(value=3), b='qwe'), other='other')

(This script is complete, it should run "as is")

Initialize hooks

When you initialize a dataclass, it is possible to execute code after validation with the help of __post_init_post_parse__. This is not the same as __post_init__, which executes code before validation.

Tip

If you use a stdlib dataclass, you may only have __post_init__ available and wish the validation to be done before. In this case you can set Config.post_init_call = 'after_validation'

from pydantic.dataclasses import dataclass


@dataclass
class Birth:
    year: int
    month: int
    day: int


@dataclass
class User:
    birth: Birth

    def __post_init__(self):
        print(self.birth)
        #> {'year': 1995, 'month': 3, 'day': 2}

    def __post_init_post_parse__(self):
        print(self.birth)
        #> Birth(year=1995, month=3, day=2)


user = User(**{'birth': {'year': 1995, 'month': 3, 'day': 2}})

(This script is complete, it should run "as is")

Since version v1.0, any fields annotated with dataclasses.InitVar are passed to both __post_init__ and __post_init_post_parse__.

from dataclasses import InitVar
from pathlib import Path
from typing import Optional

from pydantic.dataclasses import dataclass


@dataclass
class PathData:
    path: Path
    base_path: InitVar[Optional[Path]]

    def __post_init__(self, base_path):
        print(f'Received path={self.path!r}, base_path={base_path!r}')
        #> Received path='world', base_path='/hello'

    def __post_init_post_parse__(self, base_path):
        if base_path is not None:
            self.path = base_path / self.path


path_data = PathData('world', base_path='/hello')
# Received path='world', base_path='/hello'
assert path_data.path == Path('/hello/world')
from dataclasses import InitVar
from pathlib import Path

from pydantic.dataclasses import dataclass


@dataclass
class PathData:
    path: Path
    base_path: InitVar[Path | None]

    def __post_init__(self, base_path):
        print(f'Received path={self.path!r}, base_path={base_path!r}')
        #> Received path='world', base_path='/hello'

    def __post_init_post_parse__(self, base_path):
        if base_path is not None:
            self.path = base_path / self.path


path_data = PathData('world', base_path='/hello')
# Received path='world', base_path='/hello'
assert path_data.path == Path('/hello/world')

(This script is complete, it should run "as is")

Difference with stdlib dataclasses

Note that the dataclasses.dataclass from Python stdlib implements only the __post_init__ method since it doesn't run a validation step.

When substituting usage of dataclasses.dataclass with pydantic.dataclasses.dataclass, it is recommended to move the code executed in the __post_init__ method to the __post_init_post_parse__ method, and only leave behind part of code which needs to be executed before validation.

JSON Dumping

Pydantic dataclasses do not feature a .json() function. To dump them as JSON, you will need to make use of the pydantic_encoder as follows:

import dataclasses
import json
from typing import List

from pydantic.dataclasses import dataclass
from pydantic.json import pydantic_encoder


@dataclass
class User:
    id: int
    name: str = 'John Doe'
    friends: List[int] = dataclasses.field(default_factory=lambda: [0])


user = User(id='42')
print(json.dumps(user, indent=4, default=pydantic_encoder))
"""
{
    "id": 42,
    "name": "John Doe",
    "friends": [
        0
    ]
}
"""
import dataclasses
import json

from pydantic.dataclasses import dataclass
from pydantic.json import pydantic_encoder


@dataclass
class User:
    id: int
    name: str = 'John Doe'
    friends: list[int] = dataclasses.field(default_factory=lambda: [0])


user = User(id='42')
print(json.dumps(user, indent=4, default=pydantic_encoder))
"""
{
    "id": 42,
    "name": "John Doe",
    "friends": [
        0
    ]
}
"""

(This script is complete, it should run "as is")