Skip to content
SIE

LangChain

The sie-langchain package (Python) and @sie/langchain package (TypeScript) provide drop-in components for LangChain. Use SIEEmbeddings for vector stores and SIEReranker for document compression.

Terminal window
pip install sie-langchain

This installs sie-sdk and langchain-core as dependencies.

Terminal window
# Docker (recommended)
docker run -p 8080:8080 ghcr.io/superlinked/sie:latest
# Or with GPU
docker run --gpus all -p 8080:8080 ghcr.io/superlinked/sie:latest

SIEEmbeddings implements LangChain’s Embeddings interface. Use it with any vector store.

from sie_langchain import SIEEmbeddings
embeddings = SIEEmbeddings(
base_url="http://localhost:8080",
model="BAAI/bge-m3"
)
# Embed documents
vectors = embeddings.embed_documents([
"Machine learning uses algorithms to learn from data.",
"The weather is sunny today."
])
print(len(vectors)) # 2
# Embed a query
query_vector = embeddings.embed_query("What is machine learning?")
print(len(query_vector)) # 1024
from langchain_chroma import Chroma
from sie_langchain import SIEEmbeddings
embeddings = SIEEmbeddings(model="BAAI/bge-m3")
# Create vector store
vectorstore = Chroma.from_texts(
texts=["Document one", "Document two"],
embedding=embeddings
)
# Search
results = vectorstore.similarity_search("query", k=2)

Both sync and async methods are available:

# Sync
vectors = embeddings.embed_documents(texts)
query_vec = embeddings.embed_query(text)
# Async
vectors = await embeddings.aembed_documents(texts)
query_vec = await embeddings.aembed_query(text)

SIEReranker implements BaseDocumentCompressor. Use it to rerank retrieved documents.

from langchain_core.documents import Document
from sie_langchain import SIEReranker
reranker = SIEReranker(
base_url="http://localhost:8080",
model="jinaai/jina-reranker-v2-base-multilingual",
top_k=3
)
documents = [
Document(page_content="Machine learning is a subset of AI."),
Document(page_content="The weather is sunny today."),
Document(page_content="Deep learning uses neural networks."),
]
reranked = reranker.compress_documents(documents, "What is ML?")
for doc in reranked:
score = doc.metadata.get("relevance_score", 0)
print(f"{score:.3f}: {doc.page_content[:50]}")
from langchain.retrievers import ContextualCompressionRetriever
from sie_langchain import SIEReranker
reranker = SIEReranker(model="jinaai/jina-reranker-v2-base-multilingual", top_k=5)
compression_retriever = ContextualCompressionRetriever(
base_compressor=reranker,
base_retriever=vectorstore.as_retriever(search_kwargs={"k": 20})
)
# Retrieves 20 docs, reranks, returns top 5
results = compression_retriever.invoke("What is machine learning?")

Use SIESparseEncoder with SIEEmbeddings for hybrid dense+sparse search.

from langchain_pinecone import PineconeHybridSearchRetriever
from sie_langchain import SIEEmbeddings, SIESparseEncoder
retriever = PineconeHybridSearchRetriever(
embeddings=SIEEmbeddings(model="BAAI/bge-m3"),
sparse_encoder=SIESparseEncoder(model="BAAI/bge-m3"),
index=pinecone_index
)
results = retriever.invoke("hybrid search query")

Complete example combining embeddings, reranking, and LLM generation:

from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI
from langchain.retrievers import ContextualCompressionRetriever
from sie_langchain import SIEEmbeddings, SIEReranker
# 1. Create embeddings and vector store
embeddings = SIEEmbeddings(
base_url="http://localhost:8080",
model="BAAI/bge-m3"
)
documents = [
"Machine learning is a branch of artificial intelligence.",
"Neural networks are inspired by biological neurons.",
"Deep learning uses multiple layers of neural networks.",
"Python is popular for machine learning development.",
]
vectorstore = Chroma.from_texts(texts=documents, embedding=embeddings)
# 2. Create two-stage retriever with reranking
reranker = SIEReranker(
base_url="http://localhost:8080",
model="jinaai/jina-reranker-v2-base-multilingual",
top_k=2
)
retriever = ContextualCompressionRetriever(
base_compressor=reranker,
base_retriever=vectorstore.as_retriever(search_kwargs={"k": 10})
)
# 3. Build RAG chain
template = """Answer based on the context:
Context: {context}
Question: {question}"""
prompt = ChatPromptTemplate.from_template(template)
llm = ChatOpenAI(model="gpt-4o-mini")
def format_docs(docs):
return "\n".join(doc.page_content for doc in docs)
chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
# 4. Query
answer = chain.invoke("What is deep learning?")
print(answer)
ParameterTypeDefaultDescription
base_urlstrhttp://localhost:8080SIE server URL
modelstrBAAI/bge-m3Model to use
instructionstrNoneInstruction prefix for encoding
output_dtypestrNoneOutput dtype: float32, float16, int8, binary
gpustrNoneTarget GPU type for routing
timeout_sfloat180.0Request timeout in seconds
ParameterTypeDefaultDescription
base_urlstrhttp://localhost:8080SIE server URL
modelstrjinaai/jina-reranker-v2-base-multilingualReranker model
top_kintNoneNumber of documents to return
gpustrNoneTarget GPU type for routing
timeout_sfloat180.0Request timeout in seconds