Skip to main content
Pixeltable is the open-source Python library providing declarative data infrastructure for multimodal AI applications—enabling incremental storage, transformation, indexing, retrieval, and orchestration of data.

Installation

pip install pixeltable
import pixeltable as pxt

Core Operations

# Create
pxt.create_dir('project')
pxt.create_dir('project/subdir')

# List
pxt.ls()                           # DataFrame of contents
pxt.list_dirs()                    # List of directory names
pxt.list_tables()                  # List of table names
contents = pxt.get_dir_contents('project')

# Move/rename
pxt.move('old/path', 'new/path')

# Delete
pxt.drop_dir('project', force=True)

API Reference

Full directory and table operations

Type System

Core Types

PythonPixeltable
strpxt.String
intpxt.Int
floatpxt.Float
boolpxt.Bool
datetimepxt.Timestamp
datepxt.Date
dictpxt.Json
bytespxt.Binary
UUIDpxt.UUID

Media & Arrays

pxt.Image              # Images
pxt.Video              # Videos
pxt.Audio              # Audio
pxt.Document           # PDFs, Word docs

# Arrays
pxt.Array[(768,), pxt.Float]    # Fixed-dim
pxt.Array[(None,), pxt.Float]   # Variable-dim

# Non-nullable
pxt.Required[pxt.String]

Type System Guide

Complete type system documentation

Queries & Data Operations

# Insert
t.insert([{'name': 'Alice', 'age': 30}])
t.insert([User(name='Alice', age=30)])  # Pydantic

# Update
t.where(t.name == 'Alice').update({'age': 31})
t.batch_update([{'_rowid': id1, 'score': 100}])

# Delete
t.where(t.age < 18).delete()

# Recompute
t.recompute_columns(['embedding'])

Queries Guide

Learn query expressions

Join Tables

Join cookbook

Computed Columns

# Basic expression
t.add_computed_column(total=t.price * t.quantity)

# With LLM
from pixeltable.functions.openai import chat_completions

t.add_computed_column(
    summary=chat_completions(
        messages=[{'role': 'user', 'content': t.text}],
        model='gpt-4o-mini'
    ).choices[0].message.content
)

# With UDF
@pxt.udf
def word_count(text: str) -> int:
    return len(text.split())

t.add_computed_column(words=word_count(t.content))
Options:
  • stored=False — Compute on demand (not stored)
  • destination='s3://bucket/path/' — Store media in S3

Computed Columns Guide

Full computed columns documentation

Custom Functions (UDFs)

@pxt.udf
def multiply(a: int, b: int) -> int:
    return a * b

t.select(result=multiply(t.x, t.y)).collect()
from pixeltable.func import Batch

@pxt.udf(batch_size=32)
def embed(texts: Batch[str]) -> Batch[list[float]]:
    return model.encode(texts).tolist()
@pxt.query
def top_k_search(query: str, k: int = 5):
    """Reusable query function."""
    sim = docs.content.similarity(string=query)
    return docs.order_by(sim, asc=False).select(docs.content, score=sim).limit(k)

# Use in computed column
t.add_computed_column(context=top_k_search(t.question))
# Convert table to callable function
lookup = pxt.retrieval_udf(
    kb_table,
    name='search_kb',
    description='Search knowledge base',
    parameters=['topic'],
    limit=5
)

results = lookup(topic='python')
tools = pxt.tools(lookup)  # Use as LLM tool
@pxt.uda
class std(pxt.Aggregator):
    def __init__(self):
        self.sum = 0.0
        self.sum_sq = 0.0
        self.count = 0
    def update(self, val: float | None):
        if val is not None:
            self.sum += val
            self.sum_sq += val * val
            self.count += 1
    def value(self) -> float | None:
        if self.count == 0:
            return None
        mean = self.sum / self.count
        variance = (self.sum_sq / self.count) - (mean * mean)
        return variance ** 0.5
# Connect to MCP server
udfs = pxt.mcp_udfs('http://localhost:8000/mcp')
t.add_computed_column(result=udfs[0](t.data))

UDFs Guide

Complete UDF documentation

1

Create Index

from pixeltable.functions.openai import embeddings

t.add_embedding_index(
    'idx',
    column=t.content,
    embedding=embeddings(input=t.content, model='text-embedding-3-small'),
    metric='cosine'  # 'cosine', 'ip', 'l2'
)
2

Search with .similarity()

sim = t.content.similarity(string='query')
results = t.order_by(sim, asc=False).select(t.content, score=sim).limit(10)
3

Or use named index

results = t.idx.similarity_search('query', k=10)
# Get raw embeddings
t.select(t.content, emb=t.content.embedding()).collect()

# Drop index
t.drop_embedding_index('idx')

Embedding Indexes

Guide

Semantic Search

Cookbook

Image Search

Cookbook

Iterators

from pixeltable.functions.video import frame_iterator
from pixeltable.functions.document import document_splitter
frames = pxt.create_view(
    'project/frames',
    videos,
    iterator=frame_iterator(video=videos.video, fps=1)
)

Iterators Guide

Full documentation

Document Chunking

RAG chunking cookbook

Views & Snapshots

# Filtered view
active = pxt.create_view(
    'project/active_users',
    users.where(users.status == 'active')
)

# View with iterator
frames = pxt.create_view(
    'project/frames',
    videos,
    iterator=frame_iterator(video=videos.video, fps=2)
)

# Snapshot (read-only)
snapshot = pxt.create_snapshot('project/backup', users)

Views Guide

Full views documentation

Version Control

t.history()                        # Version history
t.history(n=10)                    # Last 10 versions
versions = t.get_versions()
t.revert()                         # Undo last change (cannot be undone!)
t.revert() cannot be undone! Use with caution.

Version Control Guide

Full documentation

Data Sharing

# Publish to cloud
pxt.publish(
    source='my/table',
    destination_uri='pxt://username/dataset',
    access='public'  # or 'private' (default)
)
t.push()  # Sync local → cloud

Data Sharing Guide

Full documentation

AI Integrations

OpenAI

from pixeltable.functions.openai import chat_completions, embeddings, invoke_tools

# Chat
t.add_computed_column(
    response=chat_completions(
        messages=[{'role': 'user', 'content': t.prompt}],
        model='gpt-4o'
    ).choices[0].message.content
)

# Embeddings
t.add_computed_column(
    emb=embeddings(input=t.text, model='text-embedding-3-small').data[0].embedding
)

# Vision (multimodal)
t.add_computed_column(
    desc=chat_completions(
        messages=[{
            'role': 'user',
            'content': [
                {'type': 'text', 'text': 'Describe'},
                {'type': 'image_url', 'image_url': {'url': t.image}}
            ]
        }],
        model='gpt-4o'
    ).choices[0].message.content
)

# Tool calling
t.add_computed_column(tool_results=invoke_tools(tools, t.llm_response))
Other OpenAI functions: image_generations (DALL-E), speech (TTS), transcriptions (Whisper), translations, moderations

OpenAI Guide

Provider guide

OpenAI API

API reference

Other Major Providers

from pixeltable.functions.anthropic import messages
t.add_computed_column(
    response=messages(
        messages=[{'role': 'user', 'content': t.prompt}],
        model='claude-sonnet-4-20250514'
    ).content[0].text
)

Anthropic Guide

All LLM Providers

ProviderModuleKey FunctionsGuide
Together AItogetherchat_completions, embeddings, image_generationsGuide
Groqgroqchat_completions, invoke_toolsGuide
Fireworksfireworkschat_completionsGuide
Mistral AImistralaichat_completions, fim_completions, embeddingsGuide
DeepSeekdeepseekchat_completionsGuide
OpenRouteropenrouterchat_completionsGuide
Ollamaollamachat, generate, embedGuide
Llama.cppllama_cppcreate_chat_completionGuide
ReplicatereplicaterunGuide
falfalrunGuide
Voyage AIvoyageaiembeddings, rerank, multimodal_embedGuide

Hugging Face

20+ functions: embeddings, vision, generation, NLP, speech
from pixeltable.functions.huggingface import sentence_transformer, detr_for_object_detection, image_to_image

t.add_computed_column(emb=sentence_transformer(t.text, model_id='all-MiniLM-L6-v2'))
t.add_computed_column(obj=detr_for_object_detection(t.img, model_id='facebook/detr-resnet-50'))
t.add_computed_column(transformed=image_to_image(t.img, t.prompt, model_id='stable-diffusion-v1-5/stable-diffusion-v1-5'))

Hugging Face Guide

Provider guide

Hugging Face API

API reference

Local & Specialized Models

Whisper

Speech-to-text

WhisperX

Advanced transcription

YOLOX

Object detection

Reve

Audio/video editing

TwelveLabs

Video understanding

Vision

Bounding boxes

Built-in Functions

# Common methods (no import needed)
t.name.lower() / .upper() / .strip() / .replace('old', 'new') / .split(' ')
t.name.contains('text') / .startswith('A') / .endswith('z') / .len()
t.name.contains_re(r'\d+') / .findall(r'\w+')  # Regex

String API (40+ functions)

Timestamp

Date

Math

JSON


Data Import/Export

from pixeltable import io

# CSV, JSON, Parquet, Excel
t.insert(io.import_csv('data.csv'))
t.insert(io.import_json('data.json'))
t.insert(io.import_parquet('data.parquet'))
t.insert(io.import_excel('data.xlsx', sheet_name='Sheet1'))

# Pandas
import pandas as pd
t.insert(io.import_pandas(pd.read_csv('data.csv')))

# Hugging Face datasets
from datasets import load_dataset
ds = load_dataset('squad', split='train[:100]')
t.insert(io.import_huggingface_dataset(ds))

Import CSV

Import S3

Export PyTorch


Configuration

# API Keys
import os
os.environ['OPENAI_API_KEY'] = 'sk-...'
os.environ['ANTHROPIC_API_KEY'] = 'sk-ant-...'

# Logging
pxt.configure_logging(level='INFO')

# Custom database
pxt.init({'home': '/path/to/data'})
Config file: ~/.pixeltable/config.toml
[openai]
api_key = "sk-..."

Configuration Guide

Full configuration documentation

Tool Calling

# Create tools
tools = pxt.tools(my_udf1, my_udf2)

# Use with LLM
response = chat_completions(
    messages=[{'role': 'user', 'content': query}],
    model='gpt-4o',
    tools=tools,
    tool_choice=tools.choice(required=True)
)

# Execute tools
results = invoke_tools(tools, response)
Providers with invoke_tools: OpenAI, Anthropic, Gemini, Bedrock, Groq

Tool Calling Cookbook

Full tool calling guide

Common Patterns

RAG Pipeline

from pixeltable.functions.document import document_splitter
from pixeltable.functions.openai import embeddings, chat_completions

# 1. Chunk documents
chunks = pxt.create_view(
    'rag/chunks',
    docs,
    iterator=document_splitter(document=docs.doc, separators='paragraph', limit=500)
)

# 2. Index
chunks.add_embedding_index(
    'idx',
    column=chunks.text,
    embedding=embeddings(input=chunks.text, model='text-embedding-3-small')
)

# 3. Query function
@pxt.query
def retrieve(query: str, k: int = 5):
    sim = chunks.text.similarity(string=query)
    return chunks.order_by(sim, asc=False).select(chunks.text, score=sim).limit(k)

# 4. Generate answers
qa = pxt.create_table('rag/qa', {'question': pxt.String})
qa.add_computed_column(context=retrieve(qa.question))
qa.add_computed_column(
    answer=chat_completions(
        messages=[
            {'role': 'system', 'content': 'Answer using context: ' + qa.context},
            {'role': 'user', 'content': qa.question}
        ],
        model='gpt-4o-mini'
    ).choices[0].message.content
)

RAG Pipeline

RAG Operations

RAG Demo

# Memory store
memories = pxt.create_table('agent/memories', {'content': pxt.String})
memories.add_embedding_index('idx', column=memories.content, embedding=...)

# Retrieval
@pxt.query
def recall(context: str, k: int = 3):
    sim = memories.content.similarity(string=context)
    return memories.order_by(sim, asc=False).limit(k).select(memories.content)

# Use in conversation
conversations.add_computed_column(recalled=recall(conversations.message))

Agent Memory Pattern

from pixeltable.functions.video import frame_iterator
from pixeltable.functions.yolox import yolox

frames = pxt.create_view(
    'project/frames',
    videos,
    iterator=frame_iterator(video=videos.video, fps=1)
)
frames.add_computed_column(detections=yolox(frames.frame))

Object Detection in Videos


Quick Reference

Essential Commands

TaskCommand
Create tablepxt.create_table('dir/table', schema)
Get tablepxt.get_table('dir/table')
Queryt.where(condition).collect()
Add computedt.add_computed_column(name=expr)
Embed indext.add_embedding_index('idx', column, embedding)
Create viewpxt.create_view('name', base, iterator=...)

Decorators

DecoratorPurpose
@pxt.udfUser-defined function
@pxt.udaUser-defined aggregate
@pxt.queryReusable query function

Additional Resources

Label Studio Integration

Data labeling

FiftyOne Integration

Dataset visualization

Pydantic Integration

Structured outputs

Deployment

Production deployment

Discord

Community

GitHub

Source code

Changelog

Updates

Full API Documentation: docs.pixeltable.com/sdk/latestLast updated: March 2026 | Pixeltable v0.5.x
Last modified on March 15, 2026