Build a Scalable Multimodal Image Search with Alibaba Cloud OSS Vector Buckets
This guide walks through setting up Alibaba Cloud OSS Vector Buckets, installing the necessary SDKs, uploading image datasets, creating vector indexes, generating embeddings with the Bailei multimodal model, writing vectors, performing semantic searches, and visualizing results via a Gradio web UI.
Overview
Alibaba Cloud OSS Vector Bucket provides low‑cost, large‑scale storage for high‑dimensional vectors, suitable for multimodal search, knowledge bases, RAG, and AI agents. This guide shows how to build an image semantic search system using OSS Vector Buckets and the Bailei multimodal embedding model.
Prerequisites
OSS service enabled; AccessKey ID and AccessKey Secret.
Bailei (DashScope) service enabled; API Key.
Python 3.12+.
Install SDKs:
pip install alibabacloud-oss-v2
pip install dashscopeSet credentials as environment variables:
# Bailei API Key
export DASHSCOPE_API_KEY=<your‑bailei‑api‑key>
# OSS credentials
export oss_test_access_key_id=<AccessKey ID>
export oss_test_access_key_secret=<AccessKey Secret>
export oss_test_region=<cn‑hangzhou>
export oss_test_account_id=<your‑account‑ID>Step 1: Upload Images to OSS
Upload a local folder of images to a regular OSS bucket so that the embedding model can access them via OSS URLs.
import os
import alibabacloud_oss_v2 as oss
from alibabacloud_oss_v2.models import PutObjectRequest
def create_oss_client():
cfg = oss.config.load_default()
cfg.credentials_provider = oss.credentials.StaticCredentialsProvider(
os.getenv('oss_test_access_key_id'),
os.getenv('oss_test_access_key_secret')
)
cfg.region = os.getenv('oss_test_region')
return oss.Client(cfg)
def upload_with_uploader(client, bucket_name, local_path, oss_key):
uploader = client.uploader()
result = uploader.upload_file(
filepath=local_path,
request=PutObjectRequest(bucket=bucket_name, key=oss_key)
)
return result
client = create_oss_client()
bucket_name = "your-bucket-name"
local_image_path = "data/photograph/"
for i, name in enumerate(os.listdir(local_image_path), 1):
local_path = os.path.join(local_image_path, name)
oss_key = f"photograph/{name}"
try:
res = upload_with_uploader(client, bucket_name, local_path, oss_key)
print(f"[{i}/{len(os.listdir(local_image_path))}] Uploaded {name}, status {res.status_code}")
except Exception as e:
print(f"[{i}] Failed {name}: {e}")Step 2: Create Vector Bucket and Index
Create a Vector Bucket (up to 10 per region) and an index that matches the embedding dimension (1024) and distance metric (cosine).
import os
import alibabacloud_oss_v2 as oss
import alibabacloud_oss_v2.vectors as oss_vectors
def create_vector_bucket():
cfg = oss.config.load_default()
cfg.credentials_provider = oss.credentials.StaticCredentialsProvider(
os.getenv('oss_test_access_key_id'),
os.getenv('oss_test_access_key_secret')
)
cfg.region = os.getenv('oss_test_region')
cfg.account_id = os.getenv('oss_test_account_id')
client = oss_vectors.Client(cfg)
bucket_name = "my-test-2"
client.put_vector_bucket(
oss_vectors.models.PutVectorBucketRequest(bucket=bucket_name)
)
print("Vector bucket created")
def create_vector_index():
cfg = oss.config.load_default()
cfg.credentials_provider = oss.credentials.StaticCredentialsProvider(
os.getenv('oss_test_access_key_id'),
os.getenv('oss_test_access_key_secret')
)
cfg.region = os.getenv('oss_test_region')
cfg.account_id = os.getenv('oss_test_account_id')
client = oss_vectors.Client(cfg)
client.put_vector_index(
oss_vectors.models.PutVectorIndexRequest(
bucket="my-test-2",
index_name="test1",
dimension=1024,
data_type='float32',
distance_metric='cosine',
metadata={"nonFilterableMetadataKeys": ["key1", "key2"]}
)
)
print("Index created")Step 3: Generate and Write Vectors
Use Bailei’s multimodal-embedding-v1 model to convert each image into a 1024‑dimensional vector.
import dashscope
from dashscope import MultiModalEmbeddingItemImage
def embed_image(image_url: str) -> list[float]:
resp = dashscope.MultiModalEmbedding.call(
model="multimodal-embedding-v1",
input=[MultiModalEmbeddingItemImage(image=image_url, factor=1.0)]
)
return resp.output["embeddings"][0]["embedding"]Batch‑write vectors to the index.
import os, json
import alibabacloud_oss_v2 as oss
import alibabacloud_oss_v2.vectors as oss_vectors
def batch_write_vectors(data_file="./data/data.json"):
cfg = oss.config.load_default()
cfg.credentials_provider = oss.credentials.StaticCredentialsProvider(
os.getenv('oss_test_access_key_id'),
os.getenv('oss_test_access_key_secret')
)
cfg.region = os.getenv('oss_test_region')
cfg.account_id = os.getenv('oss_test_account_id')
client = oss_vectors.Client(cfg)
with open(data_file, "r") as f:
vectors = json.load(f)
bucket = "my-test-2"
index = "test1"
batch_size = 500
total = 0
for i in range(0, len(vectors), batch_size):
batch = vectors[i:i+batch_size]
result = client.put_vectors(
oss_vectors.models.PutVectorsRequest(
bucket=bucket,
index_name=index,
vectors=batch
)
)
total += len(batch)
print(f"Written {total}/{len(vectors)} vectors, status {result.status_code}")
batch_write_vectors()Step 4: Semantic Search
Convert a text query to a vector and query the index. The SDK returns the top‑K most similar images with optional distance and metadata.
import os
import alibabacloud_oss_v2 as oss
import alibabacloud_oss_v2.vectors as oss_vectors
import dashscope
from dashscope import MultiModalEmbeddingItemText
def text_to_vector(text: str) -> list[float]:
resp = dashscope.MultiModalEmbedding.call(
model="multimodal-embedding-v1",
input=[MultiModalEmbeddingItemText(text=text, factor=1.0)]
)
return resp.output["embeddings"][0]["embedding"]
def query_vectors(query_text: str, top_k: int = 5, filter_body: dict = None):
cfg = oss.config.load_default()
cfg.credentials_provider = oss.credentials.StaticCredentialsProvider(
os.getenv('oss_test_access_key_id'),
os.getenv('oss_test_access_key_secret')
)
cfg.region = os.getenv('oss_test_region')
cfg.account_id = os.getenv('oss_test_account_id')
client = oss_vectors.Client(cfg)
q_vec = {"float32": text_to_vector(query_text)}
req = oss_vectors.models.QueryVectorsRequest(
bucket="my-test-2",
index_name="test1",
query_vector=q_vec,
top_k=top_k,
return_distance=True,
return_metadata=True,
filter=filter_body
)
result = client.query_vectors(req)
for i, vec in enumerate(result.vectors, 1):
print(f"[{i}] Key: {vec.get('key')}, Distance: {vec.get('distance')}, Metadata: {vec.get('metadata')}")
# Example
query_vectors("狗狗")Metadata filters can be combined with $in, $and, $or operators.
filter_body = {
"$and": [
{"city": {"$in": ["hangzhou", "shanghai"]}},
{"height": {"$in": ["1024"]}}
]
}
query_vectors("高楼大厦", filter_body=filter_body)Step 5: CLI Shortcut (oss‑vectors‑embed)
The oss-vectors-embed command wraps the SDK calls.
oss-vectors-embed \
--account-id <your-account-id> \
--vectors-region cn-hangzhou \
put \
--region cn-hangzhou \
--vector-bucket-name my-test-2 \
--index-name test1 \
--model-id multimodal-embedding-v1 \
--image "oss://bucket/photograph/*" \
--filename-as-key oss-vectors-embed \
--account-id <your-account-id> \
--vectors-region cn-hangzhou \
query \
--vector-bucket-name my-test-2 \
--index-name test1 \
--model-id multimodal-embedding-v1 \
--text-value "狗狗" \
--top-k 100Step 6: Visual Search UI with Gradio
Install Gradio and run a simple web app that accepts a text query, optional metadata filters, and displays the retrieved images.
pip install gradio==5.44.1 # gradio_app.py (simplified)
import os, json, logging
import alibabacloud_oss_v2 as oss
import alibabacloud_oss_v2.vectors as oss_vectors
import dashscope, gradio as gr
from PIL import Image
logging.basicConfig(level=logging.INFO)
class Util:
cfg = oss.config.load_default()
cfg.credentials_provider = oss.credentials.StaticCredentialsProvider(
os.getenv('oss_test_access_key_id'),
os.getenv('oss_test_access_key_secret')
)
cfg.region = os.getenv('oss_test_region')
cfg.account_id = os.getenv('oss_test_account_id')
client = oss_vectors.Client(cfg)
bucket = "my-test-2"
index = "test1"
@staticmethod
def embedding(text: str) -> list[float]:
return dashscope.MultiModalEmbedding.call(
model="multimodal-embedding-v1",
input=[dashscope.MultiModalEmbeddingItemText(text=text, factor=1.0)]
).output["embeddings"][0]["embedding"]
@staticmethod
def query(text, top_k=5, city=None, height=None):
filter_body = None
sub = []
if city:
sub.append({"city": {"$in": city}})
if height:
sub.append({"height": {"$in": height}})
if sub:
filter_body = {"$and": sub}
result = Util.client.query_vectors(
oss_vectors.models.QueryVectorsRequest(
bucket=Util.bucket,
index_name=Util.index,
query_vector={"float32": Util.embedding(text)},
filter=filter_body,
top_k=top_k,
return_distance=True,
return_metadata=True,
)
)
gallery = []
base_dir = os.path.dirname(__file__)
for vec in result.vectors:
img_path = os.path.join(base_dir, "data/photograph/", vec["key"])
img = Image.open(img_path)
gallery.append((img, json.dumps(vec)))
return gallery
with gr.Blocks(title="OSS Demo") as demo:
with gr.Tab("Image Search"):
txt = gr.Textbox(label="Query Text", value="狗狗")
top = gr.Slider(1, 30, value=10, label="Top K")
city = gr.Dropdown(["hangzhou", "shanghai", "beijing"], multiselect=True, label="City")
height = gr.Dropdown(["1024", "683"], multiselect=True, label="Height")
btn = gr.Button("Search")
gallery = gr.Gallery(columns=5)
btn.click(Util.query, inputs=[txt, top, city, height], outputs=gallery)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
