Reputation: 1093
Is there a straight-forward approach to generate a Pydantic model from a dictionary?
Here is a sample of the data I have.
{
'id': '424c015f-7170-4ac5-8f59-096b83fe5f5806082020',
'contacts': [{
'displayName': 'Norma Fisher',
'id': '544aa395-0e63-4f9a-8cd4-767b3040146d'
}],
'startTime': '2020-06-08T09:38:00+00:00'
}
Expecting a model similar to ...
class NewModel(BaseModel):
id: str
contacts: list
startTime: str
Upvotes: 78
Views: 187404
Reputation: 2908
In Pydantic 2, you can use MyModel.model_validate(my_dict)
to generate a model from a dictionary. According to the documentation –
this is very similar to the
__init__
method of the model, except it takes a dict rather than keyword arguments.
If you're Pydantic 1, the method is parse_obj
instead.
Upvotes: 124
Reputation: 83
If you're able to specify the fields, you can generate a model from the input data:
from pydantic import BaseModel, create_model
from typing import List
input_data = {
'id': '424c015f-7170-4ac5-8f59-096b83fe5f5806082020',
'contacts': [{
'displayName': 'Norma Fisher',
'id': '544aa395-0e63-4f9a-8cd4-767b3040146d'
}],
'startTime': '2020-06-08T09:38:00+00:00'
}
class Contact(BaseModel):
displayName: str
id: str
example_model = create_model(
'ExampleModel',
id=(str, ''),
contacts=(List[Contact], []),
startTime=(str, ''),
)
obj = example_model.model_validate(input_data)
You can also use datamodel-code-generator to generate the pydantic model:
poetry add datamodel-code-generator
poetry run datamodel-codegen \
--target-python-version 3.12 \
--input ./input.txt \
--input-file-type dict \
--output ./output.py
When I ran this with the input in the question, this was the output of datamodel-codegen
in the output.py
file:
# generated by datamodel-codegen:
# filename: input.txt
# timestamp: 2024-06-28T14:52:06+00:00
from __future__ import annotations
from typing import List
from pydantic import BaseModel
class Contact(BaseModel):
displayName: str
id: str
class Model(BaseModel):
id: str
contacts: List[Contact]
startTime: str
Upvotes: 0
Reputation: 91
I use this method to generate models at run time using a dictionary definition. This approach allows you to define nested models too. The field type syntax borrows from the create_model method.
from pydantic import create_model
m = {
"a":(int,...),
"b":{
"c":(str,"hi"),
"d":{
"e":(bool,True),
"f":(float,0.5)
}
}
}
def dict_model(name:str,dict_def:dict):
fields = {}
for field_name,value in dict_def.items():
if isinstance(value,tuple):
fields[field_name]=value
elif isinstance(value,dict):
fields[field_name]=(dict_model(f'{name}_{field_name}',value),...)
else:
raise ValueError(f"Field {field_name}:{value} has invalid syntax")
return create_model(name,**fields)
model = dict_model("some_name",m)
Upvotes: 9
Reputation: 952
You can also use its __init__
method:
your_model = YourModel(**your_dict)
Upvotes: 68
Reputation: 1290
For Pydantic version 2, use the model_validate
class method inherited from BaseModel
. The parse_obj
method is deprecated in pydantic version 2. The migration guide has more information about deprecated methods.
model_instance = NewModel.model_validate(data)
Upvotes: 5
Reputation: 808
Here is a customized code for data model generation using python dicts.
Code mostly borrowed from @data_wiz
from pydantic import create_model
# https://stackoverflow.com/questions/62267544/generate-pydantic-model-from-a-dict
from copy import deepcopy
def get_default_values(input_schema_copy):
"""Get the default values from the structured schema dictionary. Recursive Traversal of the Schema is performed here.
Args:
input_schema_copy (dict): The input structured dictionary schema. Preferred deepcopy of the input schema to avoid inplace changes for the same.
Returns:
default_values (dict): The default values of the input schema.
"""
for k, v in input_schema_copy.items():
if isinstance(v, dict):
input_schema_copy[k] = get_default_values(v)
else:
input_schema_copy[k] = v[1]
return input_schema_copy
def get_defaults(input_schema):
"""Wrapper around get_default_values to get the default values of the input schema using a deepcopy of the same to avoid arbitrary value changes.
Args:
input_schema (dict): The input structured dictionary schema.
Returns:
default_values (dict): The default values of the input schema.
"""
input_schema_copy = deepcopy(input_schema)
return get_default_values(input_schema_copy)
def are_any_defaults_empty(default_values):
"""Check if any of the default values are empty (Ellipsis - ...)?
Args:
default_values (dict): The default values of the input schema.
Returns:
Bool: True if any of the default values are empty (Ellipsis - ...), False otherwise.
"""
for _, v in default_values.items():
if isinstance(v, dict):
are_any_defaults_empty(v)
else:
if v is Ellipsis: # ... symbol
return True
return False
def correct_schema_structure(input_schema_copy):
for k, v in input_schema_copy.items():
if isinstance(v, dict):
input_schema_copy[k] = correct_schema_structure(v)
elif type(v) == type:
input_schema_copy[k] = (v,...)
elif not hasattr(v, '__iter__') or isinstance(v, str):
input_schema_copy[k] = (type(v),v)
return input_schema_copy
def dict_model(dict_def:dict, name :str = "Demo_Pydantic_Nested_Model"):
"""Helper function to create the Pydantic Model from the dictionary.
Args:
name (str): The Model Name that you wish to give to the Pydantic Model.
dict_def (dict): The Schema Definition using a Dictionary.
Raises:
ValueError: When the Schema Definition is not a Tuple/Dictionary.
Returns:
pydantic.Model: A Pydantic Model.
"""
fields = {}
for field_name,value in dict_def.items():
if isinstance(value,tuple):
fields[field_name]=value
elif isinstance(value,dict):
# assign defaults to nested structures here (if present)
default_value = get_defaults(value)
default_value = Ellipsis if are_any_defaults_empty(default_value) else default_value
fields[field_name]=(dict_model(value, f'{name}_{field_name}'),default_value)
else:
raise ValueError(f"Field {field_name}:{value} has invalid syntax")
print(fields) # helpful for debugging
return create_model(name,**fields)
input_schema = {
"a":(int,...),
"b":{
"c":(str,"hi"),
"d":{
"e":(bool,True),
"f":(float,0.5)
},
},
"g":"hello",
"h" : 123,
"i" : str,
"k" : int
}
input_schema_corrected = correct_schema_structure(input_schema)
input_schema_corrected
Output :
{'a': (int, Ellipsis),
'b': {'c': (str, 'hi'), 'd': {'e': (bool, True), 'f': (float, 0.5)}},
'g': (str, 'hello'),
'h': (int, 123),
'i': (str, Ellipsis),
'k': (int, Ellipsis)}
model = dict_model(dict_def= input_schema, name= "Demo_Pydantic_Nested_Model")
model.schema()
{'title': 'Demo_Pydantic_Nested_Model',
'type': 'object',
'properties': {'a': {'title': 'A', 'type': 'integer'},
'b': {'title': 'B',
'default': {'c': 'hi', 'd': {'e': True, 'f': 0.5}},
'allOf': [{'$ref': '#/definitions/Demo_Pydantic_Nested_Model_b'}]},
'g': {'title': 'G', 'default': 'hello', 'type': 'string'},
'h': {'title': 'H', 'default': 123, 'type': 'integer'},
'i': {'title': 'I', 'type': 'string'},
'k': {'title': 'K', 'type': 'integer'}},
'required': ['a', 'i', 'k'],
'definitions': {'Demo_Pydantic_Nested_Model_b_d': {'title': 'Demo_Pydantic_Nested_Model_b_d',
'type': 'object',
'properties': {'e': {'title': 'E', 'default': True, 'type': 'boolean'},
'f': {'title': 'F', 'default': 0.5, 'type': 'number'}}},
'Demo_Pydantic_Nested_Model_b': {'title': 'Demo_Pydantic_Nested_Model_b',
'type': 'object',
'properties': {'c': {'title': 'C', 'default': 'hi', 'type': 'string'},
'd': {'title': 'D',
'default': {'e': True, 'f': 0.5},
'allOf': [{'$ref': '#/definitions/Demo_Pydantic_Nested_Model_b_d'}]}}}}}
test_dict = { "a" : 0, "i" : "hello", "k" : 123}
model(**test_dict).dict()
Advantages over original answer :
Upvotes: 2
Reputation: 159
If you have a sample json and want to generate a pydantic model for validation and use it, then you can try this website - https://jsontopydantic.com/ which can generate a pydantic model from a sample json
Upvotes: 5
Reputation: 58
Whilst I like @data_wiz dictionary definition, Here is an alternative suggestion based on what my needs to take simple JSON responses on the fly which are normally CamelCase key elements and be able to process this into a pythonic styled class.
With the standard functions JSON converts to Dict easily, however! I wanted to work on this in a pythonic style I also wanted to be able to have some type overrides converting strings to pythonic types I also wanted to indicated elements that are optional. This is where I start loving Pydantic.
The following code snippet can generate a model from an actual data Dict from a JSON API response, as keys are camelcase it will convert them to pythonic snake style but retain the CamelCase as Alias.
This pydantic aliasing enables easy consumption of a JSON converted to Dict without key conversion and also the direct export of JSON formatted output. NB observe the config of the dynamic model DynamicModel.__config__.allow_population_by_field_name = True
this allow the creation of a dynamicModel from Alias or Pythonic field names.
This Code is not fully featured currently cannot handle Lists but it is working well for me for simple cases. Example of use is in the docstring of the pydanticModelGenerator
from inflection import underscore
from typing import Any, Dict, Optional
from pydantic import BaseModel, Field, create_model
class ModelDef(BaseModel):
"""Assistance Class for Pydantic Dynamic Model Generation"""
field: str
field_alias: str
field_type: Any
class pydanticModelGenerator:
"""
Takes source_data:Dict ( a single instance example of something like a JSON node) and self generates a pythonic data model with Alias to original source field names. This makes it easy to popuate or export to other systems yet handle the data in a pythonic way.
Being a pydantic datamodel all the richness of pydantic data validation is available and these models can easily be used in FastAPI and or a ORM
It does not process full JSON data structures but takes simple JSON document with basic elements
Provide a model_name, an example of JSON data and a dict of type overrides
Example:
source_data = {'Name': '48 Rainbow Rd',
'GroupAddressStyle': 'ThreeLevel',
'LastModified': '2020-12-21T07:02:51.2400232Z',
'ProjectStart': '2020-12-03T07:36:03.324856Z',
'Comment': '',
'CompletionStatus': 'Editing',
'LastUsedPuid': '955',
'Guid': '0c85957b-c2ae-4985-9752-b300ab385b36'}
source_overrides = {'Guid':{'type':uuid.UUID},
'LastModified':{'type':datetime },
'ProjectStart':{'type':datetime },
}
source_optionals = {"Comment":True}
#create Model
model_Project=pydanticModelGenerator(
model_name="Project",
source_data=source_data,
overrides=source_overrides,
optionals=source_optionals).generate_model()
#create instance using DynamicModel
project_instance=model_Project(**project_info)
"""
def __init__(
self,
model_name: str = None,
source_data: str = None,
overrides: Dict = {},
optionals: Dict = {},
):
def field_type_generator(k, overrides, optionals):
pass
field_type = str if not overrides.get(k) else overrides[k]["type"]
return field_type if not optionals.get(k) else Optional[field_type]
self._model_name = model_name
self._json_data = source_data
self._model_def = [
ModelDef(
field=underscore(k),
field_alias=k,
field_type=field_type_generator(k, overrides, optionals),
)
for k in source_data.keys()
]
def generate_model(self):
"""
Creates a pydantic BaseModel
from the json and overrides provided at initialization
"""
fields = {
d.field: (d.field_type, Field(alias=d.field_alias)) for d in self._model_def
}
DynamicModel = create_model(self._model_name, **fields)
DynamicModel.__config__.allow_population_by_field_name = True
return DynamicModel
Upvotes: 1
Reputation: 13269
There's no method for exactly that, but you can use create_model()
to create a model if you know the field types.
Or there's datamodel-code-generator (separate package) which allows you to generate models from schema definitions.
Upvotes: 7