Skip to main content
This guide shows how to pull annotations from Avala and feed them directly into a PyTorch Dataset for model training.

Prerequisites

pip install avala torch torchvision pillow

Export Annotations

from avala import Client

client = Client()  # reads AVALA_API_KEY

# Create an export for your project
export = client.exports.create(project="proj_abc123")

# Poll until ready
import time
while export.status != "completed":
    time.sleep(2)
    export = client.exports.get(export.uid)

print(f"Download: {export.download_url}")

Load into a PyTorch Dataset

import json
import requests
from pathlib import Path
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T


class AvalaDataset(Dataset):
    """PyTorch dataset backed by an Avala JSON export."""

    def __init__(self, export_path: str, images_dir: str, transform=None):
        with open(export_path) as f:
            self.annotations = json.load(f)
        self.images_dir = Path(images_dir)
        self.transform = transform or T.Compose([
            T.Resize((224, 224)),
            T.ToTensor(),
            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        ann = self.annotations[idx]
        image = Image.open(self.images_dir / ann["file_name"]).convert("RGB")
        image = self.transform(image)

        # Extract bounding boxes and labels
        boxes = []
        labels = []
        for obj in ann.get("annotations", []):
            if obj["type"] == "bounding_box":
                c = obj["coordinates"]
                boxes.append([c["x"], c["y"], c["x"] + c["width"], c["y"] + c["height"]])
                labels.append(obj["label"])

        return image, {"boxes": boxes, "labels": labels}


# Usage
dataset = AvalaDataset("export.json", "./images")
loader = DataLoader(dataset, batch_size=8, shuffle=True)

for images, targets in loader:
    # Your training loop here
    pass

End-to-End Script

A complete script that exports from Avala, downloads images, and starts training:
import json
import time
import requests
from pathlib import Path
from avala import Client
from torch.utils.data import DataLoader


def export_and_train(project_uid: str, images_dir: str):
    client = Client()

    # 1. Export annotations
    export = client.exports.create(project=project_uid)
    while export.status != "completed":
        time.sleep(2)
        export = client.exports.get(export.uid)

    # 2. Download the export
    resp = requests.get(export.download_url)
    Path("export.json").write_bytes(resp.content)

    # 3. Create dataset and dataloader
    dataset = AvalaDataset("export.json", images_dir)
    loader = DataLoader(dataset, batch_size=8, shuffle=True)

    print(f"Training on {len(dataset)} samples")

    # 4. Training loop (replace with your model)
    for epoch in range(10):
        for batch_idx, (images, targets) in enumerate(loader):
            # model(images, targets)
            pass
        print(f"Epoch {epoch + 1} complete")


export_and_train("proj_abc123", "./images")
The SDK is currently read-only for datasets. Use the REST API or Mission Control to upload images before exporting annotations. See File Uploads.

Next Steps