Build a Scalable Multimodal Image Search with Alibaba Cloud OSS Vector Buckets

This guide walks through setting up Alibaba Cloud OSS Vector Buckets, installing the necessary SDKs, uploading image datasets, creating vector indexes, generating embeddings with the Bailei multimodal model, writing vectors, performing semantic searches, and visualizing results via a Gradio web UI.

Alibaba Cloud Infrastructure
Alibaba Cloud Infrastructure
Alibaba Cloud Infrastructure
Build a Scalable Multimodal Image Search with Alibaba Cloud OSS Vector Buckets

Overview

Alibaba Cloud OSS Vector Bucket provides low‑cost, large‑scale storage for high‑dimensional vectors, suitable for multimodal search, knowledge bases, RAG, and AI agents. This guide shows how to build an image semantic search system using OSS Vector Buckets and the Bailei multimodal embedding model.

Prerequisites

OSS service enabled; AccessKey ID and AccessKey Secret.

Bailei (DashScope) service enabled; API Key.

Python 3.12+.

Install SDKs:

pip install alibabacloud-oss-v2
pip install dashscope

Set credentials as environment variables:

# Bailei API Key
export DASHSCOPE_API_KEY=<your‑bailei‑api‑key>
# OSS credentials
export oss_test_access_key_id=<AccessKey ID>
export oss_test_access_key_secret=<AccessKey Secret>
export oss_test_region=<cn‑hangzhou>
export oss_test_account_id=<your‑account‑ID>

Step 1: Upload Images to OSS

Upload a local folder of images to a regular OSS bucket so that the embedding model can access them via OSS URLs.

import os
import alibabacloud_oss_v2 as oss
from alibabacloud_oss_v2.models import PutObjectRequest

def create_oss_client():
    cfg = oss.config.load_default()
    cfg.credentials_provider = oss.credentials.StaticCredentialsProvider(
        os.getenv('oss_test_access_key_id'),
        os.getenv('oss_test_access_key_secret')
    )
    cfg.region = os.getenv('oss_test_region')
    return oss.Client(cfg)

def upload_with_uploader(client, bucket_name, local_path, oss_key):
    uploader = client.uploader()
    result = uploader.upload_file(
        filepath=local_path,
        request=PutObjectRequest(bucket=bucket_name, key=oss_key)
    )
    return result

client = create_oss_client()
bucket_name = "your-bucket-name"
local_image_path = "data/photograph/"
for i, name in enumerate(os.listdir(local_image_path), 1):
    local_path = os.path.join(local_image_path, name)
    oss_key = f"photograph/{name}"
    try:
        res = upload_with_uploader(client, bucket_name, local_path, oss_key)
        print(f"[{i}/{len(os.listdir(local_image_path))}] Uploaded {name}, status {res.status_code}")
    except Exception as e:
        print(f"[{i}] Failed {name}: {e}")

Step 2: Create Vector Bucket and Index

Create a Vector Bucket (up to 10 per region) and an index that matches the embedding dimension (1024) and distance metric (cosine).

import os
import alibabacloud_oss_v2 as oss
import alibabacloud_oss_v2.vectors as oss_vectors

def create_vector_bucket():
    cfg = oss.config.load_default()
    cfg.credentials_provider = oss.credentials.StaticCredentialsProvider(
        os.getenv('oss_test_access_key_id'),
        os.getenv('oss_test_access_key_secret')
    )
    cfg.region = os.getenv('oss_test_region')
    cfg.account_id = os.getenv('oss_test_account_id')
    client = oss_vectors.Client(cfg)
    bucket_name = "my-test-2"
    client.put_vector_bucket(
        oss_vectors.models.PutVectorBucketRequest(bucket=bucket_name)
    )
    print("Vector bucket created")

def create_vector_index():
    cfg = oss.config.load_default()
    cfg.credentials_provider = oss.credentials.StaticCredentialsProvider(
        os.getenv('oss_test_access_key_id'),
        os.getenv('oss_test_access_key_secret')
    )
    cfg.region = os.getenv('oss_test_region')
    cfg.account_id = os.getenv('oss_test_account_id')
    client = oss_vectors.Client(cfg)
    client.put_vector_index(
        oss_vectors.models.PutVectorIndexRequest(
            bucket="my-test-2",
            index_name="test1",
            dimension=1024,
            data_type='float32',
            distance_metric='cosine',
            metadata={"nonFilterableMetadataKeys": ["key1", "key2"]}
        )
    )
    print("Index created")

Step 3: Generate and Write Vectors

Use Bailei’s multimodal-embedding-v1 model to convert each image into a 1024‑dimensional vector.

import dashscope
from dashscope import MultiModalEmbeddingItemImage

def embed_image(image_url: str) -> list[float]:
    resp = dashscope.MultiModalEmbedding.call(
        model="multimodal-embedding-v1",
        input=[MultiModalEmbeddingItemImage(image=image_url, factor=1.0)]
    )
    return resp.output["embeddings"][0]["embedding"]

Batch‑write vectors to the index.

import os, json
import alibabacloud_oss_v2 as oss
import alibabacloud_oss_v2.vectors as oss_vectors

def batch_write_vectors(data_file="./data/data.json"):
    cfg = oss.config.load_default()
    cfg.credentials_provider = oss.credentials.StaticCredentialsProvider(
        os.getenv('oss_test_access_key_id'),
        os.getenv('oss_test_access_key_secret')
    )
    cfg.region = os.getenv('oss_test_region')
    cfg.account_id = os.getenv('oss_test_account_id')
    client = oss_vectors.Client(cfg)

    with open(data_file, "r") as f:
        vectors = json.load(f)

    bucket = "my-test-2"
    index = "test1"
    batch_size = 500
    total = 0
    for i in range(0, len(vectors), batch_size):
        batch = vectors[i:i+batch_size]
        result = client.put_vectors(
            oss_vectors.models.PutVectorsRequest(
                bucket=bucket,
                index_name=index,
                vectors=batch
            )
        )
        total += len(batch)
        print(f"Written {total}/{len(vectors)} vectors, status {result.status_code}")

batch_write_vectors()

Step 4: Semantic Search

Convert a text query to a vector and query the index. The SDK returns the top‑K most similar images with optional distance and metadata.

import os
import alibabacloud_oss_v2 as oss
import alibabacloud_oss_v2.vectors as oss_vectors
import dashscope
from dashscope import MultiModalEmbeddingItemText

def text_to_vector(text: str) -> list[float]:
    resp = dashscope.MultiModalEmbedding.call(
        model="multimodal-embedding-v1",
        input=[MultiModalEmbeddingItemText(text=text, factor=1.0)]
    )
    return resp.output["embeddings"][0]["embedding"]

def query_vectors(query_text: str, top_k: int = 5, filter_body: dict = None):
    cfg = oss.config.load_default()
    cfg.credentials_provider = oss.credentials.StaticCredentialsProvider(
        os.getenv('oss_test_access_key_id'),
        os.getenv('oss_test_access_key_secret')
    )
    cfg.region = os.getenv('oss_test_region')
    cfg.account_id = os.getenv('oss_test_account_id')
    client = oss_vectors.Client(cfg)

    q_vec = {"float32": text_to_vector(query_text)}
    req = oss_vectors.models.QueryVectorsRequest(
        bucket="my-test-2",
        index_name="test1",
        query_vector=q_vec,
        top_k=top_k,
        return_distance=True,
        return_metadata=True,
        filter=filter_body
    )
    result = client.query_vectors(req)
    for i, vec in enumerate(result.vectors, 1):
        print(f"[{i}] Key: {vec.get('key')}, Distance: {vec.get('distance')}, Metadata: {vec.get('metadata')}")

# Example
query_vectors("狗狗")

Metadata filters can be combined with $in, $and, $or operators.

filter_body = {
    "$and": [
        {"city": {"$in": ["hangzhou", "shanghai"]}},
        {"height": {"$in": ["1024"]}}
    ]
}
query_vectors("高楼大厦", filter_body=filter_body)

Step 5: CLI Shortcut (oss‑vectors‑embed)

The oss-vectors-embed command wraps the SDK calls.

oss-vectors-embed \
  --account-id <your-account-id> \
  --vectors-region cn-hangzhou \
  put \
  --region cn-hangzhou \
  --vector-bucket-name my-test-2 \
  --index-name test1 \
  --model-id multimodal-embedding-v1 \
  --image "oss://bucket/photograph/*" \
  --filename-as-key
oss-vectors-embed \
  --account-id <your-account-id> \
  --vectors-region cn-hangzhou \
  query \
  --vector-bucket-name my-test-2 \
  --index-name test1 \
  --model-id multimodal-embedding-v1 \
  --text-value "狗狗" \
  --top-k 100

Step 6: Visual Search UI with Gradio

Install Gradio and run a simple web app that accepts a text query, optional metadata filters, and displays the retrieved images.

pip install gradio==5.44.1
# gradio_app.py (simplified)
import os, json, logging
import alibabacloud_oss_v2 as oss
import alibabacloud_oss_v2.vectors as oss_vectors
import dashscope, gradio as gr
from PIL import Image

logging.basicConfig(level=logging.INFO)

class Util:
    cfg = oss.config.load_default()
    cfg.credentials_provider = oss.credentials.StaticCredentialsProvider(
        os.getenv('oss_test_access_key_id'),
        os.getenv('oss_test_access_key_secret')
    )
    cfg.region = os.getenv('oss_test_region')
    cfg.account_id = os.getenv('oss_test_account_id')
    client = oss_vectors.Client(cfg)
    bucket = "my-test-2"
    index = "test1"

    @staticmethod
    def embedding(text: str) -> list[float]:
        return dashscope.MultiModalEmbedding.call(
            model="multimodal-embedding-v1",
            input=[dashscope.MultiModalEmbeddingItemText(text=text, factor=1.0)]
        ).output["embeddings"][0]["embedding"]

    @staticmethod
    def query(text, top_k=5, city=None, height=None):
        filter_body = None
        sub = []
        if city:
            sub.append({"city": {"$in": city}})
        if height:
            sub.append({"height": {"$in": height}})
        if sub:
            filter_body = {"$and": sub}
        result = Util.client.query_vectors(
            oss_vectors.models.QueryVectorsRequest(
                bucket=Util.bucket,
                index_name=Util.index,
                query_vector={"float32": Util.embedding(text)},
                filter=filter_body,
                top_k=top_k,
                return_distance=True,
                return_metadata=True,
            )
        )
        gallery = []
        base_dir = os.path.dirname(__file__)
        for vec in result.vectors:
            img_path = os.path.join(base_dir, "data/photograph/", vec["key"])
            img = Image.open(img_path)
            gallery.append((img, json.dumps(vec)))
        return gallery

with gr.Blocks(title="OSS Demo") as demo:
    with gr.Tab("Image Search"):
        txt = gr.Textbox(label="Query Text", value="狗狗")
        top = gr.Slider(1, 30, value=10, label="Top K")
        city = gr.Dropdown(["hangzhou", "shanghai", "beijing"], multiselect=True, label="City")
        height = gr.Dropdown(["1024", "683"], multiselect=True, label="Height")
        btn = gr.Button("Search")
        gallery = gr.Gallery(columns=5)
        btn.click(Util.query, inputs=[txt, top, city, height], outputs=gallery)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)
vector searchcloud storageOSSmultimodal embeddingPython SDKAI retrievalGradio UI
Alibaba Cloud Infrastructure
Written by

Alibaba Cloud Infrastructure

For uninterrupted computing services

0 followers
Reader feedback

How this landed with the community

Sign in to like

Rate this article

Was this worth your time?

Sign in to rate
Discussion

0 Comments

Thoughtful readers leave field notes, pushback, and hard-won operational detail here.