This documentation page is also available as an interactive notebook. You can launch the notebook in
Kaggle or Colab, or download it for use with an IDE or local Jupyter installation, by clicking one of the
above links.
Pixeltable’s Pydantic integration enables type-safe data insertion using
Pydantic models. Instead of inserting raw dictionaries, you can define
structured models with validation and insert them directly into
Pixeltable tables.
Benefits
- Type Safety: Pydantic validates data before insertion
- IDE Support: Autocomplete and type hints for your data
- Self-Documenting: Models serve as schema documentation
- Validation: Built-in data validation via Pydantic
Important notes
- Pydantic model fields map to Pixeltable columns by name
- Computed columns are automatically skipped during insertion
- Nested Pydantic models map to JSON columns
%pip install -qU pixeltable pydantic
import pixeltable as pxt
pxt.drop_dir('pydantic_demo', force=True)
pxt.create_dir('pydantic_demo')
Basic usage: scalar types
Define a Pydantic model with fields that match your table columns.
Pixeltable automatically maps Python types to Pixeltable types:
import datetime
from typing import Literal
from enum import Enum
import pydantic
# Define an enum for product categories
class Category(Enum):
ELECTRONICS = 1
CLOTHING = 2
BOOKS = 3
# Define a Pydantic model
class Product(pydantic.BaseModel):
name: str
price: float
in_stock: bool
category: Category
rating: Literal['poor', 'average', 'good', 'excellent']
created_at: datetime.datetime
description: str | None = None # Optional field
# Create a table with matching schema
products = pxt.create_table(
'pydantic_demo.products',
{
'name': pxt.Required[pxt.String],
'price': pxt.Required[pxt.Float],
'in_stock': pxt.Required[pxt.Bool],
'category': pxt.Required[pxt.Int], # Enum values are integers
'rating': pxt.Required[pxt.String], # Literal values
'created_at': pxt.Required[pxt.Timestamp],
'description': pxt.String, # Nullable
}
)
# Create Pydantic model instances
now = datetime.datetime.now()
product_data = [
Product(
name='Wireless Headphones',
price=79.99,
in_stock=True,
category=Category.ELECTRONICS,
rating='excellent',
created_at=now,
description='High-quality wireless headphones with noise cancellation'
),
Product(
name='Python Cookbook',
price=49.99,
in_stock=True,
category=Category.BOOKS,
rating='good',
created_at=now,
),
Product(
name='Running Shoes',
price=129.99,
in_stock=False,
category=Category.CLOTHING,
rating='average',
created_at=now,
description='Lightweight running shoes'
),
]
# Insert Pydantic models directly
products.insert(product_data)
products.collect()
Nested models and JSON columns
Nested Pydantic models automatically map to Pixeltable JSON columns.
This is useful for storing structured metadata.
# Define nested models
class Address(pydantic.BaseModel):
street: str
city: str
country: str
zip_code: str
class ContactInfo(pydantic.BaseModel):
email: str
phone: str | None = None
address: Address
class Customer(pydantic.BaseModel):
customer_id: str
name: str
contact: ContactInfo # Nested model → JSON column
# Create table with JSON column for nested data
customers = pxt.create_table(
'pydantic_demo.customers',
{
'customer_id': pxt.Required[pxt.String],
'name': pxt.Required[pxt.String],
'contact': pxt.Required[pxt.Json], # Nested model stored as JSON
}
)
# Insert nested data
customer_data = [
Customer(
customer_id='C001',
name='Alice Johnson',
contact=ContactInfo(
email='[email protected]',
phone='+1-555-0101',
address=Address(
street='123 Main St',
city='San Francisco',
country='USA',
zip_code='94102'
)
)
),
Customer(
customer_id='C002',
name='Bob Smith',
contact=ContactInfo(
email='[email protected]',
address=Address(
street='456 Oak Ave',
city='New York',
country='USA',
zip_code='10001'
)
)
),
]
customers.insert(customer_data)
customers.collect()
# Query nested JSON fields using Pixeltable's JSON path syntax
customers.select(
customers.name,
email=customers.contact.email,
city=customers.contact.address.city
).collect()
For media columns (Image, Video, Audio, Document), use str or Path
fields in your Pydantic model to specify file paths or URLs.
from pathlib import Path
class ImageRecord(pydantic.BaseModel):
title: str
image_url: str # URLs or file paths as strings
tags: list[str]
# Create table with Image column
images = pxt.create_table(
'pydantic_demo.images',
{
'title': pxt.Required[pxt.String],
'image_url': pxt.Required[pxt.Image], # Media column
'tags': pxt.Required[pxt.Json],
}
)
# Insert image records with URLs
base_url = 'https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/resources/images'
image_data = [
ImageRecord(
title='Sample Image',
image_url=f'{base_url}/000000000036.jpg',
tags=['sample', 'test', 'image']
),
]
images.insert(image_data)
images.select(images.title, images.image_url, images.tags).collect()
Working with Computed Columns
Pydantic models work seamlessly with computed columns. Simply omit
computed column fields from your model - Pixeltable will skip them
during insertion.
# Model only includes input columns
class Article(pydantic.BaseModel):
title: str
content: str
# Create table with computed column
articles = pxt.create_table(
'pydantic_demo.articles',
{
'title': pxt.Required[pxt.String],
'content': pxt.Required[pxt.String],
}
)
# Add a computed column
articles.add_computed_column(
word_count=articles.content.apply(lambda x: len(x.split()), col_type=pxt.Int)
)
# Insert data - computed columns are automatically calculated
article_data = [
Article(
title='Getting Started with Pixeltable',
content='Pixeltable is a powerful tool for building AI applications. It provides automatic versioning and incremental computation.'
),
Article(
title='Type Safety in Python',
content='Using Pydantic with Pixeltable provides type safety and validation for your data pipelines.'
),
]
articles.insert(article_data)
articles.select(articles.title, articles.word_count).collect()
Optional Fields and Defaults
Pydantic’s optional fields with defaults work naturally with
Pixeltable’s nullable columns.
class Task(pydantic.BaseModel):
title: str
priority: int = 1 # Default value
due_date: datetime.datetime | None = None # Optional
notes: str | None = None # Optional
tasks = pxt.create_table(
'pydantic_demo.tasks',
{
'title': pxt.Required[pxt.String],
'priority': pxt.Required[pxt.Int],
'due_date': pxt.Timestamp, # Nullable
'notes': pxt.String, # Nullable
}
)
# Insert with and without optional fields
tasks.insert([
Task(title='Complete project', priority=3, due_date=datetime.datetime(2025, 12, 31)),
Task(title='Review code'), # Uses default priority=1, None for optionals
Task(title='Write docs', notes='Include examples'),
])
tasks.collect()
Type Mapping Reference
Here’s the complete mapping between Pydantic/Python types and Pixeltable
types:
Learn More
For more information about working with Pydantic in Pixeltable:
If you have any questions, don’t hesitate to reach out on
Discord.