Search
In this lab we’ll build a small, clean, and reproducible dataset from end to end. You’ll practice sourcing images from public repositories, labeling efficiently with Label Studio (assisted by SAM2), running a pre-processing pipeline (cleaning, stratified splits, leakage checks, augmentations, class-imbalance fixes), and generating synthetic data with Unreal Engine 4.27 + UnrealCV (RGB/Depth/Seg) with conversion to YOLO format and light domain randomization.
As your homework, you’ll submit 50 labeled samples from your dataset.
# Create & activate virtual environment python -m venv .venv # macOS/Linux source .venv/bin/activate # Windows (PowerShell) .\.venv\Scripts\Activate.ps1 pip install --upgrade pip pip install label-studio label-studio-ml albumentations==1.4.8 opencv-python scikit-learn numpy matplotlib tqdm pycocotools
We’ll use a simple, reproducible folder layout to avoid path chaos later.
data/ raw/ # downloaded/unfiltered originals clean/ # cleaned + deduplicated images labels/ labels/ # YOLO .txt files (mirrors images by name) images/ # LS export (for traceability) classes.txt # class names notes.json # class info augmentations/ labels/ images/ classes.txt notes.json synth/ rgb/ depth/ # not used seg/ labels/ split/ train/{images,labels} val/{images,labels} test/{images,labels} hw_submission/ scripts/ 01_clean_and_dedup.py 02_label_studio.sh 03_train_test_split.py 04_augment_yolo.py 05_unreal_capture.py .venv/
labels.json
car
Car
Goal: assemble 100–300 raw images for 1–5 target classes. You can mix sources (Kaggle, Open Images, Roboflow Universe, academic repos).
Suggested sources (with licenses):
Checklist:
DATA_CARD.md
mkdir -p data/raw # Example: use dataset-specific CLI or curl/wget; store your manifest!
Start Label Studio locally and create a project with your taxonomy (e.g., car, pedestrian). Use boxes or polygons.
car, pedestrian
label-studio start # open http://localhost:8080
Steps:
1. Create project → define labels (consistent singular names). 2. Import images from data/raw/ (or data/clean/ after Part 3.1 if you prefer). 3. (Optional) Connect Studio ML backend using SAM2 for assisted masks. 4. Label a subset thoroughly. 5. Export as YOLO (or COCO) to data/labels/labelstudio_export/.
Remove corrupt files and near-duplicates to mitigate leakage and bias.
# scripts/01_clean_and_dedup.py import cv2, os, hashlib, glob, shutil from tqdm import tqdm SRC, DST = "data/raw", "data/clean" os.makedirs(DST, exist_ok=True) seen = set() for p in tqdm(glob.glob(f"{SRC}/**/*.*", recursive=True)): if not (p.lower().endswith((".jpg",".jpeg",".png",".bmp"))): continue img = cv2.imread(p) if img is None: continue ok, buf = cv2.imencode(".jpg", img, [int(cv2.IMWRITE_JPEG_QUALITY), 95]) if not ok: continue h = hashlib.md5(buf).hexdigest() if h in seen: continue seen.add(h) shutil.copy2(p, os.path.join(DST, os.path.basename(p))) print("Kept:", len(seen))
Export labels from Label Studio (YOLO format). Launch Label Studio with the following command:
label-studio start
Example Labeling Interface config for use with SAM2:
<View> <Style> .main { font-family: Arial, sans-serif; margin: 0; } .container { display: flex; justify-content: space-between; gap: 16px; margin-bottom: 20px; } .column { flex: 1; background-color: #fff; box-shadow: 0 2px 5px rgba(0,0,0,0.1); text-align: left; border-radius: 10px; } .label { padding: 12px; } .image-container { width: 100%; height: 300px; background-color: #ddd; } </Style> <View className="main"> <View className="container"> <View className="column"> <View className="label"> <Header value="Brush Labels"/> <BrushLabels name="tag" toName="image"> <Label value="bone" background="#3584e4"/><Label value="dog" background="#e01b24"/></BrushLabels> </View> </View> <View className="column"> <View className="label"> <Header value="Keypoint Labels"/> <KeyPointLabels name="tag2" toName="image" smart="true"> <Label value="bone" background="#77767b"/><Label value="dog" background="#77767b"/></KeyPointLabels> </View> </View> <View className="column"> <View className="label"> <Header value="Rectangle Labels"/> <RectangleLabels name="tag3" toName="image" smart="true"> <Label value="bone" background="#3d3846"/><Label value="dog" background="#3d3846"/></RectangleLabels> </View> </View> </View> <View className="image-container"> <Image name="image" value="$image" zoom="true" zoomControl="true"/> </View> </View> </View>
brush labels
segmentation masks
Stratify by labels present; if you know the source/scene, group-split to avoid same-scene leakage.
# scripts/03_train_test_split.py from sklearn.model_selection import train_test_split import os, glob, shutil IMAGES = "data/clean" LABELS = "data/labels/yolo/labels" OUT = "data/split" def label_count(lbl): try: return sum(1 for _ in open(lbl, "r", encoding="utf-8")) except: return 0 imgs = sorted([p for p in glob.glob(f"{IMAGES}/*.*") if p.lower().endswith((".jpg",".jpeg",".png"))]) X, y = [], [] for ip in imgs: lp = os.path.join(LABELS, os.path.splitext(os.path.basename(ip))[0] + ".txt") X.append((ip, lp)) y.append(0 if not os.path.exists(lp) or label_count(lp)==0 else 1) def dump(split, items): for sub in ["images","labels"]: os.makedirs(f"{OUT}/{split}/{sub}", exist_ok=True) for ip, lp in items: shutil.copy2(ip, f"{OUT}/{split}/images/{os.path.basename(ip)}") dst = f"{OUT}/{split}/labels/{os.path.splitext(os.path.basename(ip))[0]}.txt" if os.path.exists(lp): shutil.copy2(lp, dst) else: open(dst,"w").close() # empty labels if no objects X_tr, X_tmp, y_tr, y_tmp = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42) X_va, X_te, y_va, y_te = train_test_split(X_tmp, y_tmp, test_size=0.5, stratify=y_tmp, random_state=42) dump("train", X_tr); dump("val", X_va); dump("test", X_te) print("Split done.")
Use label-preserving transforms to increase diversity. Target minority classes for class-balanced sampling.
# scripts/04_augment_yolo.py import albumentations as A, cv2, os, glob import numpy as np AUG = A.Compose([ A.HorizontalFlip(p=0.5), A.RandomBrightnessContrast(p=0.35), A.ColorJitter(p=0.3), A.MotionBlur(blur_limit=5, p=0.2), A.Affine(scale=(0.5,1.5), translate_percent=(0.2,0.2), rotate=(-5,5), p=0.5), ], bbox_params=A.BboxParams(format="yolo", label_fields=["cls"])) IN_IMG = "data/split/train/images" IN_LBL = "data/split/train/labels" OUT_IMG = "data/split/train/images_aug" OUT_LBL = "data/split/train/labels_aug" os.makedirs(OUT_IMG, exist_ok=True); os.makedirs(OUT_LBL, exist_ok=True) def read_yolo(lbl_path, w, h): bxs, cls = [], [] if os.path.exists(lbl_path): for line in open(lbl_path, "r").read().strip().splitlines(): if not line: continue c, xc, yc, ww, hh = line.split() bxs.append([float(xc),float(yc),float(ww),float(hh)]) cls.append(int(c)) return bxs, cls for ip in glob.glob(f"{IN_IMG}/*.*"): if not ip.lower().endswith((".jpg",".jpeg",".png")): continue base = os.path.splitext(os.path.basename(ip))[0] lp = os.path.join(IN_LBL, base + ".txt") img = cv2.imread(ip); h, w = img.shape[:2] bxs, cls = read_yolo(lp, w, h) if len(bxs)==0: pass#continue out = AUG(image=img, bboxes=bxs, cls=cls) aug_img, aug_bxs, aug_cls = out["image"], out["bboxes"], out["cls"] cv2.imwrite(os.path.join(OUT_IMG, base + "_aug.jpg"), aug_img) with open(os.path.join(OUT_LBL, base + "_aug.txt"), "w") as f: for c,(xc,yc,ww,hh) in zip(aug_cls, aug_bxs): f.write(f"{c} {xc:.6f} {yc:.6f} {ww:.6f} {hh:.6f}\n")
We’ll render small synthetic bursts to cover rare poses/backgrounds. You’ll need UnrealCV enabled and a minimal scene.
Domain randomization checklist:
# scripts/unreal_capture.py (pseudo client) # scripts/05_unreal_capture.py # UnrealCV dataset capture # Outputs to: data/synth/{rgb, seg, labels} with optional backgrounds in data/backgrounds # - Picks a camera that actually moves (A/B diff test) # - Resolves target actor by prefix # - Colors target UNIQUE_RGB to isolate in /object_mask # - Spherical camera sampling + look-at # - YOLO labels from binary mask from __future__ import annotations import os, sys, time, math, random, re import numpy as np import cv2 from PIL import Image # ======== USER CONFIG (minimal) ======== PORT = 9005 TARGET_PREFIX = "bone_actor" # e.g., 'bone_actor_20' UNIQUE_RGB = (255, 0, 255) # color to paint target for mask extraction NUM_IMAGES = 20 IMG_W, IMG_H = 1280, 960 FOV_DEG = 60 # Camera randomization (keeps target in view) DIST_RANGE = (150, 600) YAW_RANGE = (-180, 180) PITCH_RANGE = (-25, 25) ROLL_RANGE = (-10, 10) # Target jitter & rotation TARGET_BASE_LOC = (0, 0, 100) TARGET_JITTER = (-5, 5) RANDOMIZE_TARGET_ROT = True TROT_PITCH = (-10, 10) TROT_YAW = (0, 360) TROT_ROLL = (-10, 10) # What to save SAVE_LIT = True # saves lit render to data/synth/rgb/{id}.png SAVE_MASK = True # saves binary mask to data/synth/seg/{id}_bin.png SAVE_COMP = True # composite (lit over random background) to data/synth/rgb/{id}_comp.png SAVE_YOLO = True # YOLO txt to data/synth/labels/{id}.txt YOLO_CLASS_ID = 0 # ======== PROJECT PATHS (fit course skeleton) ======== PROJ_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) DATA_ROOT = os.path.join(PROJ_ROOT, "data") SYNTH_ROOT = os.path.join(DATA_ROOT, "synth") RGB_DIR = os.path.join(SYNTH_ROOT, "rgb") SEG_DIR = os.path.join(SYNTH_ROOT, "seg") LABELS_DIR = os.path.join(SYNTH_ROOT, "labels") PROBE_DIR = os.path.join(SYNTH_ROOT, "_probes") BACKGROUND_DIR = os.path.join(DATA_ROOT, "backgrounds") # optional folder for composites def _ensure_dirs(): for d in (RGB_DIR, SEG_DIR, LABELS_DIR, PROBE_DIR): os.makedirs(d, exist_ok=True) # backgrounds are optional def _abs(*parts): return os.path.abspath(os.path.join(*parts)) def _list_backgrounds(): if not os.path.isdir(BACKGROUND_DIR): return [] exts = (".jpg", ".jpeg", ".png", ".bmp") return [f for f in os.listdir(BACKGROUND_DIR) if f.lower().endswith(exts)] # ===== UnrealCV ===== try: import unrealcv except ImportError: print("Missing 'unrealcv' package. Install it in the UE Python environment or your venv.", file=sys.stderr) sys.exit(1) client = unrealcv.Client(('localhost', PORT)) # ===== Helpers ===== def vget_and_wait(cmd, out_path, timeout=3.0): out_path = _abs(out_path) client.request('%s %s' % (cmd, out_path)) t0 = time.time() while time.time() - t0 < timeout: if os.path.isfile(out_path) and os.path.getsize(out_path) > 0: return True time.sleep(0.05) return False def resolve_target_name(prefix): objs = client.request('vget /objects') or "" names = objs.split() if not names: return None pat = re.compile(r'^%s(?:_\d+)?$' % re.escape(prefix), re.IGNORECASE) exact = [n for n in names if pat.match(n)] if exact: return exact[0] starts = [n for n in names if n.lower().startswith(prefix.lower())] return starts[0] if starts else None # ---------- Camera movement: pick a camera that REALLY moves ---------- def _save_probe(cam_token, xyz, pyr, path): x,y,z = xyz; pitch,yaw,roll = pyr client.request('vset /camera/%s/location %f %f %f' % (cam_token, x, y, z)) client.request('vset /camera/%s/rotation %f %f %f' % (cam_token, pitch, yaw, roll)) time.sleep(0.05) # settle return vget_and_wait('vget /camera/%s/lit' % cam_token, path) def _img_diff(a_path, b_path): a = cv2.imread(a_path); b = cv2.imread(b_path) if a is None or b is None: return 0.0 if a.shape != b.shape: h = min(a.shape[0], b.shape[0]); w = min(a.shape[1], b.shape[1]) a = a[:h,:w]; b = b[:h,:w] diff = cv2.absdiff(a, b) return float(diff.mean()) def pick_movable_camera(): client.request('vset /cameras/spawn') time.sleep(0.2) tokens = (client.request('vget /cameras') or "").split() nums = [t for t in tokens if t.isdigit()] candidates = nums + [t for t in tokens if t not in nums] + [str(i) for i in range(8)] seen = set() candidates = [t for t in candidates if not (t in seen or seen.add(t))] A_loc = ( 0.0, 0.0, 300.0); A_rot = ( -15.0, 0.0, 0.0) B_loc = (500.0, 200.0, 150.0); B_rot = ( -10.0, 140.0, 0.0) for cam in candidates: pA = _abs(PROBE_DIR, "_probeA_%s.png" % cam) pB = _abs(PROBE_DIR, "_probeB_%s.png" % cam) okA = _save_probe(cam, A_loc, A_rot, pA) okB = _save_probe(cam, B_loc, B_rot, pB) if not (okA and okB): for p in (pA,pB): try: os.remove(p) except: pass continue d = _img_diff(pA, pB) for p in (pA,pB): try: os.remove(p) except: pass if d > 1.0: print("[INFO] Movable camera selected:", cam, "(diff=%.2f)" % d) return cam return None # ---------- Mask / Composite / YOLO ---------- def isolate_color_mask(mask_rgb_path, rgb): img_bgr = cv2.imread(mask_rgb_path) if img_bgr is None: return None img = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) r,g,b = rgb lower = np.array([r,g,b], dtype=np.uint8) upper = np.array([r,g,b], dtype=np.uint8) return cv2.inRange(img, lower, upper) def composite_with_bg(bg_files, lit_path, mask_bin_path, out_path): if not bg_files: # no backgrounds -> copy lit Image.open(lit_path).save(out_path); return imgI = Image.open(lit_path).convert("RGB") # open a random background (robust to broken files) while True: try: from_path = _abs(BACKGROUND_DIR, random.choice(bg_files)) bg = Image.open(from_path).convert("RGB") break except Exception: if not bg_files: break continue bg = bg.resize(imgI.size) m = Image.open(mask_bin_path).convert("L") out = Image.composite(imgI, bg, m) out.save(out_path) def yolo_from_mask(mask_bin, img_w, img_h, cls_id=0): if mask_bin is None: return None x, y, w, h = cv2.boundingRect(mask_bin) if w == 0 or h == 0: return None cx = (x + w/2.0) / float(img_w) cy = (y + h/2.0) / float(img_h) nw = w / float(img_w) nh = h / float(img_h) return np.array([[cls_id, cx, cy, nw, nh]], dtype=np.float32) # ===== Main ===== def main(): _ensure_dirs() backgrounds = _list_backgrounds() # Connect client.connect() if not client.isconnected(): print('UnrealCV server is not running. Start PIE or packaged game with UnrealCV plugin.') sys.exit(-1) print(client.request('vget /unrealcv/status')) # Pick a camera that actually moves cam = pick_movable_camera() if cam is None: print("[ERROR] Could not find a movable camera. In UE PIE Output Log, run: vset /cameras/spawn") client.disconnect(); sys.exit(1) # Camera setup client.request('vset /camera/%s/size %d %d' % (cam, IMG_W, IMG_H)) client.request('vset /camera/%s/fov %d' % (cam, FOV_DEG)) # Resolve target actor name from prefix target = resolve_target_name(TARGET_PREFIX) if not target: print("[ERROR] Could not resolve target with prefix '%s'." % TARGET_PREFIX) print("Sample objects:", (client.request('vget /objects') or "")[:500]) client.disconnect(); sys.exit(1) print("[INFO] Target resolved:", target) # Paint target for mask isolation r,g,b = UNIQUE_RGB client.request('vset /object/%s/color %d %d %d' % (target, r, g, b)) time.sleep(0.2) for i in range(NUM_IMAGES): print("IMAGE:", i) # Randomize target tx = TARGET_BASE_LOC[0] + random.uniform(*TARGET_JITTER) ty = TARGET_BASE_LOC[1] + random.uniform(*TARGET_JITTER) tz = TARGET_BASE_LOC[2] + random.uniform(*TARGET_JITTER) client.request('vset /object/%s/location %f %f %f' % (target, tx, ty, tz)) if RANDOMIZE_TARGET_ROT: pt = random.uniform(*TROT_PITCH) yt = random.uniform(*TROT_YAW) rt = random.uniform(*TROT_ROLL) client.request('vset /object/%s/rotation %f %f %f' % (target, pt, yt, rt)) # Sample camera pose around target dist = random.uniform(*DIST_RANGE) yaw = random.uniform(*YAW_RANGE) pitch = random.uniform(*PITCH_RANGE) roll = random.uniform(*ROLL_RANGE) cx = tx + dist * math.cos(math.radians(pitch)) * math.cos(math.radians(yaw)) cy = ty + dist * math.cos(math.radians(pitch)) * math.sin(math.radians(yaw)) cz = tz + dist * math.sin(math.radians(pitch)) dx, dy, dz = (tx - cx, ty - cy, tz - cz) cam_yaw = math.degrees(math.atan2(dy, dx)) hyp = math.sqrt(dx*dx + dy*dy) cam_pitch = math.degrees(math.atan2(dz, hyp)) client.request('vset /camera/%s/location %f %f %f' % (cam, cx, cy, cz)) client.request('vset /camera/%s/rotation %f %f %f' % (cam, cam_pitch, cam_yaw, roll)) # Paths inside data/synth stem = f"{i:06d}" img_lit = _abs(RGB_DIR, f"{stem}.png") # lit img_mask_rgb = _abs(SEG_DIR, f"{stem}_mask.png")# object_mask (RGB) img_mask_bin = _abs(SEG_DIR, f"{stem}_bin.png") # binary img_comp = _abs(RGB_DIR, f"{stem}_comp.png") # composite yolo_txt = _abs(LABELS_DIR, f"{stem}.txt") # YOLO # Capture if SAVE_LIT and not vget_and_wait('vget /camera/%s/lit' % cam, img_lit): print("[WARN] lit not saved - skipping frame"); continue if (SAVE_MASK or SAVE_COMP or SAVE_YOLO) and not vget_and_wait('vget /camera/%s/object_mask' % cam, img_mask_rgb): print("[WARN] object_mask not saved - skipping frame"); continue # Mask isolate mask = isolate_color_mask(img_mask_rgb, UNIQUE_RGB) if mask is None: print("[WARN] failed to read mask image - skipping frame"); continue mask_bin = np.where(mask > 0, 255, 0).astype(np.uint8) if SAVE_MASK: cv2.imwrite(img_mask_bin, mask_bin) # YOLO if SAVE_YOLO: yolo = yolo_from_mask(mask_bin, IMG_W, IMG_H, cls_id=YOLO_CLASS_ID) if yolo is not None: np.savetxt(yolo_txt, yolo, fmt="%.0f %.6f %.6f %.6f %.6f") else: open(yolo_txt, 'w').close() # Composite if SAVE_COMP and SAVE_LIT: composite_with_bg(backgrounds, img_lit, img_mask_bin, img_comp) client.disconnect() print("Done. Outputs in:", SYNTH_ROOT) if __name__ == "__main__": main()
Result:
source: real/synth
Submit via BRUTE a folder data/hw_submission/ containing:
data/hw_submission/