This documentation page is also available as an interactive notebook. You can launch the notebook in
Kaggle or Colab, or download it for use with an IDE or local Jupyter installation, by clicking one of the
above links.
Pixeltable’s Jina AI integration enables you to access state-of-the-art
embedding and reranker models via the Jina AI API.
Prerequisites
Important notes
- Jina AI usage may incur costs based on your Jina AI plan.
- Be mindful of sensitive data and consider security measures when
integrating with external services.
First you’ll need to install Pixeltable and set up your Jina AI API key.
%pip install -qU pixeltable
import os
import getpass
if 'JINA_API_KEY' not in os.environ:
os.environ['JINA_API_KEY'] = getpass.getpass(
'Enter your Jina AI API key: '
)
Now let’s create a Pixeltable directory to hold the tables for our demo.
import pixeltable as pxt
# Remove the 'jina_demo' directory and its contents, if it exists
pxt.drop_dir('jina_demo', force=True)
pxt.create_dir('jina_demo')
Created directory ‘jina_demo’.
<pixeltable.catalog.dir.Dir at 0x1454c53d0>
Text Embeddings
Jina AI provides frontier multilingual embedding models for semantic
search and RAG applications. The jina-embeddings-v3 model supports 89+
languages and achieves state-of-the-art performance.
from pixeltable.functions import jina
# Create a table for document embeddings
docs_t = pxt.create_table('jina_demo.documents', {'text': pxt.String})
# Add computed column with Jina embeddings
# task='retrieval.passage' optimizes embeddings for documents to be searched
docs_t.add_computed_column(
embedding=jina.embeddings(
docs_t.text, model='jina-embeddings-v3', task='retrieval.passage'
)
)
Created table ‘documents’.
Added 0 column values with 0 errors.
No rows affected.
# Insert some sample documents
documents = [
'The Mediterranean diet emphasizes fish, olive oil, and vegetables, believed to reduce chronic diseases.',
'Photosynthesis in plants converts light energy into glucose and produces essential oxygen.',
'20th-century innovations, from radios to smartphones, centered on electronic advancements.',
'Rivers provide water, irrigation, and habitat for aquatic species, vital for ecosystems.',
"Apple's conference call to discuss fourth fiscal quarter results is scheduled for Thursday, November 2, 2023.",
"Shakespeare's works, like 'Hamlet' and 'A Midsummer Night's Dream,' endure in literature.",
]
docs_t.insert({'text': doc} for doc in documents)
Inserting rows into `documents`: 6 rows [00:00, 1394.00 rows/s]
Inserted 6 rows with 0 errors.
6 rows inserted, 12 values computed.
# View the embeddings
docs_t.select(docs_t.text, docs_t.embedding).head(3)
Multilingual Embeddings
Jina AI models excel at multilingual text. The same model can embed text
in different languages into the same semantic space.
# Create a table for multilingual content
multilingual_t = pxt.create_table(
'jina_demo.multilingual', {'text': pxt.String, 'language': pxt.String}
)
multilingual_t.add_computed_column(
embedding=jina.embeddings(
multilingual_t.text,
model='jina-embeddings-v3',
task='text-matching',
)
)
# Insert texts in different languages (all about organic skincare)
multilingual_t.insert(
[
{
'text': 'Organic skincare for sensitive skin with aloe vera and chamomile.',
'language': 'English',
},
{
'text': 'Bio-Hautpflege für empfindliche Haut mit Aloe Vera und Kamille.',
'language': 'German',
},
{
'text': 'Cuidado de la piel orgánico para piel sensible con aloe vera y manzanilla.',
'language': 'Spanish',
},
{
'text': '针对敏感肌专门设计的天然有机护肤产品',
'language': 'Chinese',
},
]
)
multilingual_t.select(
multilingual_t.language, multilingual_t.text
).collect()
Created table ‘multilingual’.
Added 0 column values with 0 errors.
Inserting rows into `multilingual`: 4 rows [00:00, 736.23 rows/s]
Inserted 4 rows with 0 errors.
Embedding Index for Similarity Search
You can use Jina AI embeddings with Pixeltable’s embedding index for
efficient similarity search.
# Create a table with an embedding index
search_t = pxt.create_table('jina_demo.search', {'text': pxt.String})
# Add embedding index for similarity search
embed_fn = jina.embeddings.using(
model='jina-embeddings-v3', task='retrieval.passage'
)
search_t.add_embedding_index('text', string_embed=embed_fn)
# Insert documents
search_t.insert({'text': doc} for doc in documents)
Created table ‘search’.
Inserting rows into `search`: 6 rows [00:00, 565.03 rows/s]
Inserted 6 rows with 0 errors.
6 rows inserted, 12 values computed.
# Perform similarity search
sim = search_t.text.similarity(
string='What are the health benefits of Mediterranean food?'
)
search_t.order_by(sim, asc=False).limit(3).select(
search_t.text, score=sim
).collect()
Reranking
Jina AI’s reranker models can improve search relevance by reordering
results based on semantic similarity to the query.
# Create a table for reranking queries
rerank_t = pxt.create_table(
'jina_demo.rerank',
{'query': pxt.String, 'documents': pxt.Json},
if_exists='replace',
)
# Add computed column for reranking
rerank_t.add_computed_column(
reranked=jina.rerank(
rerank_t.query,
rerank_t.documents,
model='jina-reranker-v2-base-multilingual',
top_n=3,
return_documents=True,
)
)
# Insert a query with candidate documents
rerank_t.insert(
query="When is Apple's conference call scheduled?",
documents=documents,
)
Created table ‘rerank’.
Added 0 column values with 0 errors.
Inserting rows into `rerank`: 1 rows [00:00, 543.16 rows/s]
Inserted 1 row with 0 errors.
1 row inserted, 2 values computed.
# View the reranked results
result = rerank_t.select(rerank_t.reranked).collect()
result['reranked'][0]
{‘usage’: {‘total_tokens’: 221},
‘results’: [{‘index’: 4,
‘document’: “Apple’s conference call to discuss fourth fiscal quarter results is scheduled for Thursday, November 2, 2023.”,
‘relevance_score’: 0.64511991},
{‘index’: 2,
‘document’: ‘20th-century innovations, from radios to smartphones, centered on electronic advancements.’,
‘relevance_score’: 0.03846619},
{‘index’: 5,
‘document’: “Shakespeare’s works, like ‘Hamlet’ and ‘A Midsummer Night’s Dream,’ endure in literature.”,
‘relevance_score’: 0.02517884}]}
Learn More