multilingual-e5-small ONNX for pg_onnx
ONNX-converted version of intfloat/multilingual-e5-small, optimized for pgvector and pg_onnx.
This repository enables native semantic search and embedding inference inside PostgreSQL, using ONNXRuntime and pg_onnx.
It includes:
encoder.onnx: The sentence embedding encodertext_to_embedding.onnx: A wrapper model for direct text-to-embedding inferencetokenizer.onnx: ONNX-converted tokenizer for fast, portable preprocessing
Files
| File | Purpose | Size |
|---|---|---|
encoder.onnx |
Sentence embedding encoder | ~470 MB |
text_to_embedding.onnx |
Wrapper for direct text-to-embedding inference | ~475 MB |
tokenizer.onnx |
ONNX serialized tokenizer | ~5 MB |
Conversion Details
This model was converted from the original multilingual-e5-small using:
- Hugging Face Transformers for model and tokenizer loading
- ONNX export via
transformers.onnxand custom scripts - Tokenizer serialized into ONNX using
tokenizersand custom conversion logic
All components are designed to run natively in PostgreSQL via pg_onnx, enabling efficient semantic search and embedding inference directly in the database.
Download from Hugging Face
from huggingface_hub import hf_hub_download
# Adjust repo_id if you fork or rename
repo_id = "oga5/multilingual-e5-small-pg-onnx"
enc_path = hf_hub_download(repo_id=repo_id, filename="encoder.onnx")
tte_path = hf_hub_download(repo_id=repo_id, filename="text_to_embedding.onnx")
tok_path = hf_hub_download(repo_id=repo_id, filename="tokenizer.onnx")
print(enc_path, tte_path, tok_path)
Setup
Required libraries:
onnxruntimeonnxruntime-extensions(build with-DOCOS_ENABLE_SPM_TOKENIZER=ON)pg_onnx(v1.23.1b or later)
-- After building pg_onnx, install the pg_onnx extension
create extension pg_onnx
Usage
-- Register models
select pg_onnx_import_model('e5-tok', 'v1', pg_read_binary_file('/PATH/tokenizer.onnx')::bytea, '{"ortextensions_path": "libortextensions.so"}'::jsonb, 'e5 tokenizer');
select pg_onnx_import_model('e5-embedding', 'v1', pg_read_binary_file('/PATH/text_to_embedding.onnx')::bytea, '{"ortextensions_path": "libortextensions.so"}'::jsonb, 'e5 text to embedding');
-- Create functions
create or replace function e5_tok(input_text text)
returns integer[]
AS $$
SELECT array_agg(value::int)
FROM jsonb_array_elements_text(
pg_onnx_execute_session(
'e5-tok',
'v1',
jsonb_build_object('inputs', jsonb_build_array(input_text))
) -> 'tokens'
);
select array(
select jsonb_array_elements_text(
pg_onnx_execute_session(
'e5-embedding',
'v1',
jsonb_build_object('text', jsonb_build_array(input_text))
)->'embedding'->0
)::float
)::vector(384);
select e5_embedding('passage: ' || input_text);
select e5_embedding('query ' || input_text);
$$
language 'sql'
immutable;
-- Create sample data
create table llm_test (
i integer not null primary key,
txt text,
v vector(384)
);
create index llm_test_v_idx on llm_test using hnsw (v vector_ip_ops);
insert into llm_test (i,txt) values ('1','Machine learning is a subfield of artificial intelligence');
insert into llm_test (i,txt) values ('2','A database is a system for managing data');
insert into llm_test (i,txt) values ('3','PostgreSQL is a powerful open-source database');
insert into llm_test (i,txt) values ('4','Vector search retrieves results by computing similarity');
insert into llm_test (i,txt) values ('5','ONNX is a standard format for machine learning models');
insert into llm_test (i,txt) values ('6','Natural language processing is a technology for handling text');
insert into llm_test (i,txt) values ('7','Embeddings convert text into vectors');
insert into llm_test (i,txt) values ('8','Cosine similarity measures similarity between vectors');
insert into llm_test (i,txt) values ('9','A tokenizer splits text into tokens');
insert into llm_test (i,txt) values ('10','Transformers are a modern neural network architecture');
insert into llm_test (i,txt) values ('11','SQL is a language for manipulating databases');
insert into llm_test (i,txt) values ('12','Indexes improve query performance');
insert into llm_test (i,txt) values ('13','pgvector is a vector extension for PostgreSQL');
insert into llm_test (i,txt) values ('14','Semantic search retrieves based on meaning');
insert into llm_test (i,txt) values ('15','Neural networks mimic the structure of the brain');
insert into llm_test (i,txt) values ('16','Deep learning uses multi-layer neural networks');
insert into llm_test (i,txt) values ('17','Batch processing handles multiple data at once');
insert into llm_test (i,txt) values ('18','Model inference performs prediction with a trained model');
insert into llm_test (i,txt) values ('19','Fine-tuning adapts an existing model to a specific task');
insert into llm_test (i,txt) values ('20','A cross-encoder evaluates the relevance between two texts');
-- register embeddings
update llm_test set v = e5_embedding_passage(txt);
-- Search
with q as (
select 'What is machine learning?' as query
),
qv as materialized (
select e5_embedding_query(q.query) as v from q
)
select i, txt, t.v <#> qv.v as distance
from llm_test t, qv
order by distance;
License
This project is released under the MIT License. See the full text in LICENSE.
This repository redistributes the original model weights from intfloat/multilingual-e5-small without modification. The original MIT license is retained as required.
Credits
- Original model: intfloat/multilingual-e5-small
- Conversion to ONNX and packaging: oga5