# print("Cleanning up workspace ...")
# # !rm -rf *
# print("Installing graphviz ...")
# # !sudo apt-get install graphviz 1>/adev/null
# print("Downloading MCUNet codebase ...")
# !wget https://www.dropbox.com/s/3y2n2u3mfxczwcb/mcunetv2-dev-main.zip?dl=0 >/dev/null
# !unzip mcunetv2-dev-main.zip* 1>/dev/null
# !mv mcunetv2-dev-main/* . 1>/dev/null
# print("Downloading VWW dataset ...")
# !wget https://www.dropbox.com/s/169okcuuv64d4nn/data.zip?dl=0 >/dev/null
# print("Unzipping VWW dataset ...")
# !unzip data.zip* 1>/dev/null
# print("Installing thop and onnx ...")
# !pip install thop 1>/dev/null
# !pip install onnx 1>/dev/null
๐ฉโ๐ป Lab 3
์ด๋ฒ ์๊ฐ์ Neural Architecture Search(NAS)์์ ์ค์ต์ ํด๋ณด๋ ์๊ฐ์ด์์ด์. ํ๋ผ๋ฏธํฐ๋ฅผ ๊ฐ์ง๊ณ ๋คํธ์ํฌ๋ฅผ ๋ ๊น๊ฒ ๋ง๋ค๊ฑฐ๋, ์ฑ๋์ ๋ ํฌ๊ฒ ๋ง๋๋ ๋ฐฉ๋ฒ์ ๋ํด ์ค์ ๋ก ์ฝ๋ ์์๊ฐ ์น์ ํ๊ฒ ๋ผ ์์ด, ์คํ๊ฒฐ๊ณผ๋ฅผ ์์ธํ ๋ณด๊ธฐ ์ข์๋ ์์ ์ ๋๋ค. ์์ด๋ก ๋ ์ค๋ช ์ NAS ๊ฐ์์ ๋์ค๋ ์๋ฃ๋ผ ๊ผญ ์ฝ์ผ์ค ํ์๋ ์์ด์. ๊ทธ๋ฆฌ๊ณ ์ค๊ฐ์ค๊ฐ์ ์ดํด๋ฅผ ๋๊ธฐ ์ํ ๋ค์ด์ด๊ทธ๋จ์ด๋ ์ค๋ช ์ด Getting Started ๋ถ๋ถ์ ์์ด์ ์ฐธ๊ณ ํ์๋ฉด ์ข์ ๊ฒ ๊ฐ์์.
๊ทธ๋ผ ์์ํด๋ณด์์ฃ !
Introduction
์ฒ์์๋ ์ฌ๋ฌ ์ฐ๊ตฌ๋ค๊ณผ ์ฐ๊ตฌ์ ํด๋นํ๋ ๋ชจ๋ธ์ ์ธ๊ธํฉ๋๋ค. ์ ํฌ๊ฐ ์ค๋ ์ค์ตํ ๋ชจ๋ธ์ Once for All(OFA) MCUNet ์ด๋ ์ฐธ๊ณ ํด์ฃผ์ธ์.
์ค๋ OFA MCUNet์์๋ ์๋ ๊ทธ๋ฆผ์ฒ๋ผ ์ด๋ฏธ ํ๋ จํ ๋ชจ๋ธ์ ๊ฐ์ง๊ณ ์ฑ๋ ์๋ฅผ ์ค์ด๊ฑฐ๋, ๋ ์ด์ด์ ์๋ฅผ ์กฐ์ ํ๋ ๊ฒ๊ณผ ๊ฐ์ด ํ๋ผ๋ฏธํฐ๋ฅผ ์กฐ์ ํ โsubsetโ๋ค์ ๊ฐ์ง๊ณ ๋ฉ๋ชจ๋ฆฌ์ ์ฐ์ฐ์๋(MAC)๋ฅผ ํ๊ฐํ ๊ฒ๋๋ค. ๊ทธ๋ฆฌ๊ณ ์ ํฌ๊ฐ ์ํ๋ MAC๊ณผ Peak Memory๋ฅผ ๊ฐ์ง ๋ชจ๋ธ์ ์ฐพ์๋ณด๋ ๊ฒ์ด์ฃ .
์ด๋ป๊ฒ Constraint์ ๋ง๋ ๋ชจ๋ธ์ ์ฐพ์ ๊ฒ์ด๋ํ๋ฉด, ๋ฐ๋ก Accuracy Predictor ๋ชจ๋ธ์ ๋ง๋๋ ๊ฒ๋๋ค. ๋ชจ๋ธ๊ตฌ์กฐ์ Accuracy์ ๋ํ ๋ฐ์ดํฐ๋ฅผ OFA MCUNet์์ ๋ชจ์ ๋ค์, ๊ทธ ๋ฐ์ดํฐ๋ฅผ ๊ฐ์ง๊ณ ๋ชจ๋ธ์ ๋ง๋ญ๋๋ค. ๊ทธ ๋ชจ๋ธ์ ๋ชจ๋ธ ํ๋ผ๋ฏธํฐ๋ฅผ ๋ฃ์ผ๋ฉด ๋ชจ๋ธ ์ ํ๋๊ฐ ๋์ค๋, ๊ทธ๋ฐ ๋ชจ๋ธ์ด ๋๋๊ฑฐ์ฃ . ๋ง์ง๋ง์ผ๋ก Accuracy Predictor๋ฅผ ๊ฐ์ง๊ณ ์์๋ก ๋ชจ๋ธ ํ๋ผ๋ฏธํฐ์ ๋ํ ์ํ์ ๋ชจ์ ์ํ๋ Constraint์ ๋ง๋ ๋ชจ๋ธ์ ์ฐพ๋ ๊ฒ๋๋ค. ๊ฐ์์์ NAS๋ฅผ ์๊ฐํ๋ ๋ชฉ์ ์ โํฐ ๋ชจ๋ธ์์ ์์ ๋๋ฐ์ด์ค์ ๋ฃ๊ธฐ ์ํด์ Sub-Network๋ฅผ ์ํ๋ ์คํ์ ๋ง๊ฒ ์ฐพ์ ๋ฃ๋๋ค.โ ์ธ ๊ฒ๋๋ค. ๊ทธ๋ฐ ์์ ๋๋ฐ์ด์ค์ ๋ํ ์์๋ก MCU, Alexa, Google Home์ ์๋ ๊ทธ๋ฆผ์ฒ๋ผ ๋ณด์ฌ์ฃผ์ฃ .
But the tight memory budget (50,000x smaller than GPUs) makes deep learning deployment difficult.
There are 2 main sections: accuracy & efficiency predictors and architecture search.
- For predictors, there are 4 questions in total. There is one question (5 pts) in the Getting Started section and the other three questions (30 pts) are in the Predictors section.
- For architecture search, there are 6 questions in total.
์ด์ ๊ฐ์คํ๊ณ ํ๋์ฉ ์คํํด๋ณผ๊ฒ์! ํจํค์ง๋ ์๋์ ๊ฐ์ด ์ค์นํ์๋ฉด ๋ฉ๋๋ค.
First, install the required packages and download the Visual Wake Words dataset that will be used in this lab.
import argparse
import json
from PIL import Image
from tqdm import tqdm
import copy
import math
import numpy as np
import os
import random
import torch
from torch import nn
from torchvision import datasets, transforms
from mcunet.tinynas.search.accuracy_predictor import (
AccuracyDataset,
MCUNetArchEncoder,
)
from mcunet.tinynas.elastic_nn.networks.ofa_mcunets import OFAMCUNets
from mcunet.utils.mcunet_eval_helper import calib_bn, validate
from mcunet.utils.arch_visualization_helper import draw_arch
%matplotlib inline
from matplotlib import pyplot as plt
import warnings
'ignore') warnings.filterwarnings(
Getting Started: Super Network and the VWW dataset
์คํ์์๋ ์ด๋ฏธ ํ๋ จํ MCUNetV2 super network ๋ฅผ ๊ฐ์ ธ์ต๋๋ค. ๋ฐ์ดํฐ์ ์ ๊ฐ์ ธ์ค๋ ๋ถ๋ถ, OFA MCUNet ํด๋์ค๋ฅผ ๊ฐ์ ธ์ค๋ ๋ถ๋ถ, ๊ทธ๋ฆฌ๊ณ Sub-network๋ฅผ ๊ฐ์ ธ์ ๋ชจ๋ธ ๊ตฌ์กฐ๋ฅผ ์๊ฐํํ๊ณ ์ ํฌ๊ฐ ์ํ๋ Constraint(๋ฉ๋ชจ๋ฆฌ, ์ฐ์ฐ์๋)์ ๋ง๋ ๋ชจ๋ธ์ ์ฐพ๋ ์ฝ๋ ์์ ์ ๋๋ค.
- MCUNetV2 is a family of efficiency neural networks tailored for resource-constrained microntrollers. It utilizes patch-based inference, receptive field redistribution and system-NN co-design and greatly improves the accuracy-efficiency tradeoff of MCUNet.
def build_val_data_loader(data_dir, resolution, batch_size=128, split=0):
# split = 0: real val set, split = 1: holdout validation set
assert split in [0, 1]
= transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
normalize = {"num_workers": min(8, os.cpu_count()), "pin_memory": False}
kwargs
= transforms.Compose(
val_transform
[
transforms.Resize(
(resolution, resolution)# if center crop, the person might be excluded
),
transforms.ToTensor(),
normalize,
]
)= datasets.ImageFolder(data_dir, transform=val_transform)
val_dataset
= torch.utils.data.Subset(
val_dataset list(range(len(val_dataset)))[split::2]
val_dataset,
)
= torch.utils.data.DataLoader(
val_loader =batch_size, shuffle=False, **kwargs
val_dataset, batch_size
)return val_loader
= "data/vww-s256/val"
data_dir
= build_val_data_loader(data_dir, resolution=128, batch_size=1)
val_data_loader
= 2, 3
vis_x, vis_y = plt.subplots(vis_x, vis_y)
fig, axs
= 0
num_images for data, label in val_data_loader:
= np.array((((data + 1) / 2) * 255).numpy(), dtype=np.uint8)
img = img[0].transpose(1, 2, 0)
img if label.item() == 0:
= "No person"
label_text else:
= "Person"
label_text // vis_y][num_images % vis_y].imshow(img)
axs[num_images // vis_y][num_images % vis_y].set_title(f"Label: {label_text}")
axs[num_images // vis_y][num_images % vis_y].set_xticks([])
axs[num_images // vis_y][num_images % vis_y].set_yticks([])
axs[num_images += 1
num_images if num_images > vis_x * vis_y - 1:
break
plt.show()
์ฌ๊ธฐ์ OFA MCUNet์ Design Space๊ฐ \(>10^{19}\) ๋ ๋๋ค๊ณ ํ๋ค์. ์ด๋ง์ด๋งํ์ฃ ? Subnet์ inverted MobileNet blocks๋ก ๊ตฌ์ฑ๋ผ ์์ผ๋ฉด์ ๋ชจ๋ธ ๊ตฌ์กฐ๋ฅผ ๋ฐ๊พธ๋ ํ๋ผ๋ฏธํฐ๋ก๋ kernel sizes (3, 5, 7), expand ratios (3, 4, 6), depth, global channel scaling (0.5x, 0.75x, 1.0x) (specified by width_mult_list
) ๊ฐ ์์ต๋๋ค. ์์ธํ ์ค๋ช
์ ์ด๋ฐ๊ฐ ๊ณ์ํ ๊ฒ์.
= "cuda:0"
device = OFAMCUNets(
ofa_network =2,
n_classes=(0.1, 1e-3),
bn_param=0.0,
dropout_rate="mcunet384",
base_stage_width=[0.5, 0.75, 1.0],
width_mult_list=[3, 5, 7],
ks_list=[3, 4, 6],
expand_ratio_list=[0, 1, 2],
depth_list=[1, 2, 2, 2, 2],
base_depth=True,
fuse_blk1=[False, [False, True, True, True], True, True, True, False],
se_stages
)
ofa_network.load_state_dict("vww_supernet.pth", map_location="cpu")["state_dict"], strict=True
torch.load(
)
= ofa_network.to(device) ofa_network
from mcunet.utils.pytorch_utils import count_peak_activation_size, count_net_flops, count_parameters
def evaluate_sub_network(ofa_network, cfg, image_size=None):
if "image_size" in cfg:
= cfg["image_size"]
image_size = 128
batch_size # step 1. sample the active subnet with the given config.
**cfg)
ofa_network.set_active_subnet(# step 2. extract the subnet with corresponding weights.
= ofa_network.get_active_subnet().to(device)
subnet # step 3. calculate the efficiency stats of the subnet.
= count_peak_activation_size(subnet, (1, 3, image_size, image_size))
peak_memory = count_net_flops(subnet, (1, 3, image_size, image_size))
macs = count_parameters(subnet)
params # step 4. perform BN parameter re-calibration.
calib_bn(subnet, data_dir, batch_size, image_size)# step 5. define the validation dataloader.
= build_val_data_loader(data_dir, image_size, batch_size)
val_loader # step 6. validate the accuracy.
= validate(subnet, val_loader)
acc return acc, peak_memory, macs, params
We also provide a handly helper function to visualize the architecture of the subnets. The function takes in the configuration of the subnet and returns an image representing the architecture.
def visualize_subnet(cfg):
"ks"], cfg["e"], cfg["d"], cfg["image_size"], out_name="viz/subnet")
draw_arch(cfg[= Image.open("viz/subnet.png")
im = im.rotate(90, expand=1)
im = plt.figure(figsize=(im.size[0] / 250, im.size[1] / 250))
fig "off")
plt.axis(
plt.imshow(im) plt.show()
์ ์ฝ๋๋ฅผ ์ด์ฉํด์ ๋ชจ๋ธ๊ตฌ์กฐ ์๊ฐํ๋ ํ ํ
๋ฐ์, MBConv3-3x3๊ณผ ๊ฐ์ ์ด๋ฆ์ด ๋์ฌ๊ฑฐ์์. ๊ฐ๊ฐ expand ratio e
์ kernel size of the depthwise convolution layer k
๋ก MBConv{e}-{k}x{k}๊ฐ ๋ํ๋๋ ์ฐธ๊ณ ํ์๋ฉด ์ข์ ๊ฒ ๊ฐ์์.
More Explanation to understand OFA-MCUNet
๊ณผ์ ๋ฅผ ๋ค์ด๊ฐ๊ธฐ ์์ OFA-MCUNet์ ๋ํด์ ์กฐ๊ธ ์ค๋ช ํ ๊นํด์. ๋ด๋ ค๊ฐ๋ฉด์ ๋ชจ๋ธ ๊ตฌ์กฐ์ ํ๋ผ๋ฏธํฐ๋ค์ด ๋์ค๋ ๋ฐ ๊ฐ ์๋ฏธ๋ฅผ ์๋ฉด ์ดํดํ๊ธฐ๊ฐ ๋ ์์ํ ๊ฒ๋๋ค.
๋ชจ๋ธ์ ์ด first_conv, blocks, feature_mix_layer, classifier ์ผ๋ก ๊ตฌ์ฑํด์. block์์๋ ์ฒซ ๋ฒ์งธ, ๋ง์ง๋ง block์ ์ ์ธํ ์ด 6๊ฐ์ block์์ kernel size, expand ratio, depth, width multiply๋ฅผ ํ๋ผ๋ฏธํฐ๋ก ํด์ ๋ชจ๋ธ์ ํค์ฐ๊ฑฐ๋, ์ค์ด์ฃ . ๊ฐ๊ฐ์ ํ๋ผ๋ฏธํฐ๋ฅผ ์ข ๋ ์ดํด๋ณด์ฃ !
1. Kernel size
kernel size๋ Convolution์ ๋์ค๋ ๊ทธ kernel์ด ๋ง์ต๋๋ค. ์์ ์์๋ 3x3, 5x5, 7x7๋ก ๊ฐ์ง ์ ์์ด์.
2. Width multiply, Depth
OFA MCUNet์ ๋ธ๋ญ์ผ๋ก ํํํ๋ฉด ์๋์ ๊ฐ์ฃ . ๊ทธ์ค ์ด๋ก์์ผ๋ก ์น ํด์ง Block์ ๋ณด์๋ฉด, Block์ผ๋ก ๋ค์ด์ค๋ Input Channel๊ณผ Output Channel์ด ์์ด์. ๋ฐ๋ก ๊ทธ ๋์ ์ผ๋ง๋ ์ค์ผ ๊ฒ์ธ๊ฐ, ์ ์งํ ๊ฒ์ธ๊ฐ๊ฐ Width multiply์ ๋๋ค.
๋ ๋ฒ์งธ๋ก ํ๋์ Block์ MBConv(MobileNet Conv)๋ก ๊ตฌ์ฑ๋ฉ๋๋ค. ๊ทธ๋ฌ๋ฉด ์ด MBConv๊ฐ ๋ช ๊ฐ๊ฐ ๋ค์ด ๊ฐ ๊ฒ์ด๋๊ฐ ๊ด๊ฑด์ผ ํ ๋ฐ์, ์ด๊ฑธ ์ ํ๋ ๊ฒ์ด Depth์ ๋๋ค. ํ๋ผ๋ฏธํฐ์์๋ depth_list์ base_depth๋ก ๋๋ ์ ๊ฐ block๋ณ๋ก base_depth๋ฅผ ๊ธฐ์ค์ผ๋ก depth_list์ ๋์ค๋ ๊ฐ์ ๋งํผ ๋ MBConv์ด ์ถ๊ฐ๋์ฃ .
๋ง์ง๋ง์ expand ratio ์ ๋๋ค. ์ด ํ๋ผ๋ฏธํฐ๋ MBConv ๋ด์์ ์์ด์, ์ญ์๋ ๊ทธ๋ฆผ์ ๋ณด์์ฃ . MBConv๋ MobileNet Convolution, Separable Convolution,
SE-Block, ๊ทธ๋ฆฌ๊ณ ๋ค์ MobileNet Convolution์ผ๋ก ๊ตฌ์ฑ๋์. ๊ทธ ์ค, ์ฒ์ ์ ๋ ฅ์ ์ฑ๋๊ณผ ์ฒซ MobileNet Convolution์ ๊ฑฐ์น๊ณ ๋์จ ์ถ๋ ฅ ์ฑ๋์ ๋น๋ฅผ Expand ratio๋ผ๊ณ ํฉ๋๋ค.
# OFAMCUNets
# constitutes: first_conv, blocks, feature_mix_layer, classifier
# total 9 block (first_conv, first block, blocks, last block)
# 1. first_conv = 1x1 channel inc conv (3 -> X)
# 2. first block = MB InvertedConvLayer
# 3. blocks
# - depth = num block
# - 1 block = MobileInvertedResidualBlock = MBConvLayer + Residual
#############################################################
# Dynamic MBConvLayer = 2 times channel expansion #
# fuse_blk1 se_stage #
# MBConvLayer + SeparableConv + SEBlock + MBConvLayer #
#############################################################
# SEblock: conv 1x1 (reduce) -> act -> conv 1x1 (expand) -> h_sigmoid
# -> SENet(Squeeze-and-Excitation Network)
# 4. Last block = Mobile Inverted Residual Blcok
# 5. feature_mix_layer = 1x1 channel dec conv
# 6. classifier = linear layer
# Parameters (sample_active_subnet)
# kernel size, expand ratio, depth, width multiplY
์ฝ๋ ์ค make_divisible์ด๋ผ๋ ๋ฉ์๋๊ฐ ์์ต๋๋ค. ์ฌ๊ธฐ์ ์ฑ๋์ ๋๋ฆฌ๊ฑฐ๋ ์ค์ผ ๋ 8๋ก ๋๋ ์ ์๊ฒ ํฉ๋๋ค. tensorflow์์๋ ์ฌ์ฉํ๋ค๊ณ ํ๋๋ฐ, ์ด์ ๋ ์์ง ๋ชจ๋ฅด๊ฒ ๋ค์!
def make_divisible(v, divisor, min_val=None):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
:param v:
:param divisor:
:param min_val:
:return:
"""
if min_val is None:
= divisor
min_val = max(min_val, int(v + divisor / 2) // divisor * divisor)
new_v # Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
+= divisor
new_v return new_v
TL;DR. Summary
์คํ์ ์ด 4 ๋จ๊ณ๋ก ๋๋ฉ๋๋ค. ์ฌ๊ธฐ์ ํ๋ผ๋ฏธํฐ๋ kernel size, expand ratio, depth, width multiply์ฃ .
1. OFA-MCUNet
์ฒ์์ ํ๋ จ๋ vww_supernet์ ๊ฐ์ง๊ณ ํ๋ผ๋ฏธํฐ๋ง๋ค accuracy ์กฐํฉ์ ๊ตฌํฉ๋๋ค. ๊ทธ๋ฆฌ๊ณ ๊ฐ ๊ฒฐ๊ณผ๋ง๋ค ์ดํ์ constraint ๋ฒ์ ๋ด์ ๋ค์ด์ค๋ ๋ชจ๋ธ๊ตฌ์กฐ๋ฅผ ์ฐพ๊ธฐ ์ํด MAC๊ณผ Peak memory ๋ํ ๊ตฌํ ๊ฒ๋๋ค.
2. Accuracy Predictor
์์์ ๊ตฌํ ํ๋ผ๋ฏธํฐ๋ง๋ค Accuracy๋ฅผ ๊ฐ์ง๊ณ , ์ด๋ฒ์ ๋ฐ๋๋ก ์ด ๋ฐ์ดํฐ๋ฅผ ๊ฐ์ง๊ณ Accuracy๋ฅผ ์์ธกํ๋ ๋ชจ๋ธ์ ํ๋ จ์ํฌ๊ฒ๋๋ค. ๋ชจ๋ธ์ Linear Layer๊ฐ ์ธ์ธต์ผ๋ก ์์ฌ์๋ ๊ฐ๋จํ ๋ชจ๋ธ์ ์ฌ์ฉํ์ฃ . ํ์ง๋ง ํํ๋ฏธํฐ ์กฐํฉ์ Embedding vector๋ก ๋ง๋ค๊ธฐ ์ํด encoder๊ฐ ๋ค์ด๊ฐ๋๋ค.
3. Encoding: MCUNetArchEncoder
๊ทธ ๊ณผ์ ์์ ํ๋ผ๋ฏธํฐ ์กฐํฉ์ Embedding vector๋ก Encoding์ ํฉ๋๋ค. ์๋ฅผ ๋ค์ด, Kenral size๊ฐ 3x3, 5x5, 7x7 ์ด ์๋ ๊ฒฝ์ฐ ๊ฐ๊ฐ์ (0, 0, 1), (0, 1, 0), (1, 0, 0) ์ด๋ ๊ฒ encoding ํ๋ ๊ฑฐ์ฃ . ์ด encoding์ด ๋ค์ด๊ฐ Accuracy Predictor ๋ชจ๋ธ์ ํ๋ จ์ํต๋๋ค. ํ๋ จ์ํจ ๋ชจ๋ธ์ Prediction๊ณผ Label ๊ฐ์ ์๊ด๊ด๊ณ๊ฐ Linearํ๊ฒ ๋์ค๋ ๊ฒ ๋ํ ๋ณด์ฌ์ค ๊ฒ๋๋ค.
4. Random Search and Evolutionary Search
๋ง์ง๋ง ๋จ๊ณ๋ Constraint, ์ฆ ๋ฉ๋ชจ๋ฆฌ์ MAC์ ํด๋นํ๋ ๋ชจ๋ธ์ ํ๋ผ๋ฏธํฐ๋ฅผ ์ฐพ๋ ๋จ๊ณ์ ๋๋ค. Random Mutate ๋ฐฉ์๊ณผ Crossover ๋ฐฉ์์ ์ฌ์ฉํ๋๋ฐ, ์์ธํ ๋ด์ฉ์ ์ฝ๋๋ฅผ ์ฐธ๊ณ ํ์๋๊ฒ ์ดํดํ๊ธฐ ๋ ์์ํ ๊ฒ๋๋ค! ์ฐธ๊ณ ๋ก ๋ง์ง๋ง์ Question 10์์ โThe activation size of the subnet is at most 64 KBโ ์ ์กฐ๊ฑด์ ๊ฐ์ง ๋ชจ๋ธ์ ๊ตฌ์กฐ๋ ๋ชป์ฐพ์์ต๋๋ค. ํน์ ์ฐพ๊ฒ ๋๋ค๋ฉด, ํน์ ์ฐพ์ง ๋ชปํ๋ ์ด์ ๋ฅผ ์์๊ฒ๋๋ค๋ฉด ๊ณต์ ํด์ฃผ์ธ์!
OFA_networkโs forward
๋ชจ๋ธ ์คํํ๊ธฐ์ ์์์, ์ฑ๋์ ๋ง์ฝ ์ค์ธ๋ค๋ฉด ์ด๋ค์์ผ๋ก ํ ์ง Convolution Network์์ ๋์จ ์ฝ๋๋ฅผ ๊ฐ์ ธ์๋ดค์ด์. ํ๋ผ๋ฏธํฐ์ ๋ง๊ฒ ๊ฒฐ์ ํ out_channel, in_channel์ ์๋ ์ฝ๋ ์ฒ๋ผ ์๋ผ active subnet์ด๋ผ๊ณ ๋ถ๋ฅผ ๊ฑฐ์์. ์คํ์ ์ ๊ฐ ์์๋ก ์ด๋ฏธ์ง ์ฌ์ด์ฆ๋ฅผ 48, 96, 128, 256, 384, 512๋ก ํค์๋๊ฐ๋ฉด์ ํ๊ณ , sub network๋ก ์ํ๋งํ๋ ๋ฐฉ๋ฒ์ผ๋ก๋ random, max, min์ผ๋ก ํ์ต๋๋ค.
= self.conv.weight[:out_channel, :in_channel, :, :].contiguous()
filters = get_same_padding(self.kernel_size)
padding = F.conv2d(x, filters, None, self.stride, padding, self.dilation, 1) y
ํฅ๋ฏธ๋ก์ ๋ ๊ฑด ์ด๋ฏธ์ง๊ฐ ์ปค์ง๋ฉด ์ปค์ง์๋ก Accuracy๋ ๊ณ์ ์ฌ๋ผ๊ฐ๋ค๊ฐ 512์์ ๋ถํฐ ๋จ์ด์ง๋๋ผ๊ตฌ์. ์คํ๊ฒฐ๊ณผ๋ ์๋๋ฅผ ์ฐธ๊ณ ๋ฐ๋๋๋ค.
# sample_active_subnet
# kernel size, expand ratio, depth, width mult
= 48
image_size
= ofa_network.sample_active_subnet(sample_function=random.choice, image_size=image_size)
cfg = evaluate_sub_network(ofa_network, cfg)
acc, _, _, params
visualize_subnet(cfg)print(f"The accuracy of the sampled subnet: #params={params/1e6: .1f}M, accuracy={acc: .1f}%.")
= ofa_network.sample_active_subnet(sample_function=max, image_size=image_size)
largest_cfg = evaluate_sub_network(ofa_network, largest_cfg)
acc, _, _, params
visualize_subnet(largest_cfg)print(f"The largest subnet: #params={params/1e6: .1f}M, accuracy={acc: .1f}%.")
= ofa_network.sample_active_subnet(sample_function=min, image_size=image_size)
smallest_cfg = evaluate_sub_network(ofa_network, smallest_cfg)
acc, peak_memory, macs, params
visualize_subnet(smallest_cfg)print(f"The smallest subnet: #params={params/1e6: .1f}M, accuracy={acc: .1f}%.")
Validate: 100%|โโโโโโโโโโ| 32/32 [00:00<00:00, 51.09it/s, loss=0.603, top1=65.9]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:00<00:00, 53.97it/s, loss=0.625, top1=64.2]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:00<00:00, 51.76it/s, loss=0.718, top1=59.3]
The accuracy of the sampled subnet: #params= 1.6M, accuracy= 65.9%.
The largest subnet: #params= 2.5M, accuracy= 64.2%.
The smallest subnet: #params= 0.3M, accuracy= 59.3%.
= 96
image_size
= ofa_network.sample_active_subnet(sample_function=random.choice, image_size=image_size)
cfg = evaluate_sub_network(ofa_network, cfg)
acc, _, _, params
visualize_subnet(cfg)print(f"The accuracy of the sampled subnet: #params={params/1e6: .1f}M, accuracy={acc: .1f}%.")
= ofa_network.sample_active_subnet(sample_function=max, image_size=image_size)
largest_cfg = evaluate_sub_network(ofa_network, largest_cfg)
acc, _, _, params
visualize_subnet(largest_cfg)print(f"The largest subnet: #params={params/1e6: .1f}M, accuracy={acc: .1f}%.")
= ofa_network.sample_active_subnet(sample_function=min, image_size=image_size)
smallest_cfg = evaluate_sub_network(ofa_network, smallest_cfg)
acc, peak_memory, macs, params
visualize_subnet(smallest_cfg)print(f"The smallest subnet: #params={params/1e6: .1f}M, accuracy={acc: .1f}%.")
Validate: 100%|โโโโโโโโโโ| 32/32 [00:00<00:00, 35.68it/s, loss=0.321, top1=86.4]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:00<00:00, 42.76it/s, loss=0.29, top1=88.6]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:00<00:00, 44.92it/s, loss=0.379, top1=83.4]
The accuracy of the sampled subnet: #params= 0.6M, accuracy= 86.4%.
The largest subnet: #params= 2.5M, accuracy= 88.6%.
The smallest subnet: #params= 0.3M, accuracy= 83.4%.
= 128
image_size
# sample_active_subnet
# kernel size, expand ratio, depth, width mult
= ofa_network.sample_active_subnet(sample_function=random.choice, image_size=image_size)
cfg = evaluate_sub_network(ofa_network, cfg)
acc, _, _, params
visualize_subnet(cfg)print(f"The accuracy of the sampled subnet: #params={params/1e6: .1f}M, accuracy={acc: .1f}%.")
= ofa_network.sample_active_subnet(sample_function=max, image_size=image_size)
largest_cfg = evaluate_sub_network(ofa_network, largest_cfg)
acc, _, _, params
visualize_subnet(largest_cfg)print(f"The largest subnet: #params={params/1e6: .1f}M, accuracy={acc: .1f}%.")
= ofa_network.sample_active_subnet(sample_function=min, image_size=image_size)
smallest_cfg = evaluate_sub_network(ofa_network, smallest_cfg)
acc, peak_memory, macs, params
visualize_subnet(smallest_cfg)print(f"The smallest subnet: #params={params/1e6: .1f}M, accuracy={acc: .1f}%.")
Validate: 100%|โโโโโโโโโโ| 32/32 [00:00<00:00, 39.53it/s, loss=0.228, top1=91.3]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:01<00:00, 30.92it/s, loss=0.21, top1=92.3]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:00<00:00, 40.69it/s, loss=0.307, top1=87.3]
The accuracy of the sampled subnet: #params= 1.3M, accuracy= 91.3%.
The largest subnet: #params= 2.5M, accuracy= 92.3%.
The smallest subnet: #params= 0.3M, accuracy= 87.3%.
= 256
image_size
# sample_active_subnet
# kernel size, expand ratio, depth, width mult
= ofa_network.sample_active_subnet(sample_function=random.choice, image_size=image_size)
cfg = evaluate_sub_network(ofa_network, cfg)
acc, _, _, params
visualize_subnet(cfg)print(f"The accuracy of the sampled subnet: #params={params/1e6: .1f}M, accuracy={acc: .1f}%.")
= ofa_network.sample_active_subnet(sample_function=max, image_size=image_size)
largest_cfg = evaluate_sub_network(ofa_network, largest_cfg)
acc, _, _, params
visualize_subnet(largest_cfg)print(f"The largest subnet: #params={params/1e6: .1f}M, accuracy={acc: .1f}%.")
= ofa_network.sample_active_subnet(sample_function=min, image_size=image_size)
smallest_cfg = evaluate_sub_network(ofa_network, smallest_cfg)
acc, peak_memory, macs, params
visualize_subnet(smallest_cfg)print(f"The smallest subnet: #params={params/1e6: .1f}M, accuracy={acc: .1f}%.")
Validate: 100%|โโโโโโโโโโ| 32/32 [00:01<00:00, 19.93it/s, loss=0.187, top1=93.5]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:03<00:00, 10.12it/s, loss=0.177, top1=93.9]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:01<00:00, 25.67it/s, loss=0.258, top1=90.2]
The accuracy of the sampled subnet: #params= 0.6M, accuracy= 93.5%.
The largest subnet: #params= 2.5M, accuracy= 93.9%.
The smallest subnet: #params= 0.3M, accuracy= 90.2%.
= 256+128
image_size
# sample_active_subnet
# kernel size, expand ratio, depth, width mult
= ofa_network.sample_active_subnet(sample_function=random.choice, image_size=image_size)
cfg = evaluate_sub_network(ofa_network, cfg)
acc, _, _, params
visualize_subnet(cfg)print(f"The accuracy of the sampled subnet: #params={params/1e6: .1f}M, accuracy={acc: .1f}%.")
= ofa_network.sample_active_subnet(sample_function=max, image_size=image_size)
largest_cfg = evaluate_sub_network(ofa_network, largest_cfg)
acc, _, _, params
visualize_subnet(largest_cfg)print(f"The largest subnet: #params={params/1e6: .1f}M, accuracy={acc: .1f}%.")
= ofa_network.sample_active_subnet(sample_function=min, image_size=image_size)
smallest_cfg = evaluate_sub_network(ofa_network, smallest_cfg)
acc, peak_memory, macs, params
visualize_subnet(smallest_cfg)print(f"The smallest subnet: #params={params/1e6: .1f}M, accuracy={acc: .1f}%.")
Validate: 100%|โโโโโโโโโโ| 32/32 [00:03<00:00, 8.16it/s, loss=0.241, top1=91.1]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:06<00:00, 4.60it/s, loss=0.263, top1=90.5]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:02<00:00, 12.13it/s, loss=0.34, top1=85.4]
The accuracy of the sampled subnet: #params= 1.1M, accuracy= 91.1%.
The largest subnet: #params= 2.5M, accuracy= 90.5%.
The smallest subnet: #params= 0.3M, accuracy= 85.4%.
= 512
image_size
# sample_active_subnet
# kernel size, expand ratio, depth, width mult
= ofa_network.sample_active_subnet(sample_function=random.choice, image_size=image_size)
cfg = evaluate_sub_network(ofa_network, cfg)
acc, _, _, params
visualize_subnet(cfg)print(f"The accuracy of the sampled subnet: #params={params/1e6: .1f}M, accuracy={acc: .1f}%.")
= ofa_network.sample_active_subnet(sample_function=max, image_size=image_size)
largest_cfg = evaluate_sub_network(ofa_network, largest_cfg)
acc, _, _, params
visualize_subnet(largest_cfg)print(f"The largest subnet: #params={params/1e6: .1f}M, accuracy={acc: .1f}%.")
= ofa_network.sample_active_subnet(sample_function=min, image_size=image_size)
smallest_cfg = evaluate_sub_network(ofa_network, smallest_cfg)
acc, peak_memory, macs, params
visualize_subnet(smallest_cfg)print(f"The smallest subnet: #params={params/1e6: .1f}M, accuracy={acc: .1f}%.")
Validate: 100%|โโโโโโโโโโ| 32/32 [00:06<00:00, 5.31it/s, loss=0.376, top1=83.1]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:11<00:00, 2.67it/s, loss=0.413, top1=81]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:04<00:00, 7.23it/s, loss=0.489, top1=76.1]
The accuracy of the sampled subnet: #params= 0.5M, accuracy= 83.1%.
The largest subnet: #params= 2.5M, accuracy= 81.0%.
The smallest subnet: #params= 0.3M, accuracy= 76.1%.
Question 1: Design space exploration.
Try manually sample different subnets by running the cell above multiple times. You can also vary the input resolution. Talk about your findings.
Hint: which dimension plays the most important role for the accuracy?
Answer: Image resolution plays the most important role for classification accuracy.
๋ค, ์ง๋ฌธ์์ ์ฌ์ค ํํธ๋ฅผ ์ป์ด ์คํ์ ํ์ต๋๋ค. โImage resolution์ ๋ฐ๋ฅธ Accuracy ๋ณํโ๋ฅผ ์ ์ ์์์ต๋๋ค.
Part 1. Predictors
์ด์ ๋๋ฒ์งธ ๋จ๊ณ๋ ์์์ ๋ชจ๋ธ์ ํตํด ์ป์ VWW dataset์ผ๋ก Accuracy๋ฅผ ์์ธกํ๋ ๋ชจ๋ธ์ ๋ง๋ค๊ฒ๋๋ค. ๋ชจ๋ธ์ ์๊ฐ๋ชจ๋ค ๊ฐ๋จํด์, Linear ์ธ ์ธต์ผ๋ก ๊ตฌ์ฑ๋ผ ์์ฃ . ์๋ ๊ทธ๋จ์ ๊ถ๊ทน์ ์ผ๋ก Constraint์ ํด๋นํ๋ ๋ชจ๋ธ์ ์ฐ๋ฆฌ๋ ๊ตฌํ ๊ฑฐ๋ค, ์ด๋ฐ ๋ด์ฉ์ ๋๋ค.
Efficiency predictor๋ ๋ชจ๋ธ ๊ตฌ์กฐ๊ฐ ๊ฒฐ์ ๋๋ฉด Accuracy์ ํจ๊ป ๋์ฌ๊ฑฐ์์. ์์ ์์ ์์ ํ์ผ๋ ๊ธฐ์ต์ด ์๋์ ๋ค๋ฉด ์ด์ ์์ ๋ก!
Question 2: Implement the efficiency predictor.
์ฒ์์ โAnalyticalEfficiencyPredictorโ๋ผ๋ ํด๋์ค๋ฅผ ๋ง๋ญ๋๋ค. ์ด๋ฏธ์ง ํฌ๊ธฐ์ ๋ฐ๋ผ MAC๊ณผ ๋ฉ๋ชจ๋ฆฌ๋ฅผ ๊ณ์ฐํด์ฃผ๊ณ (get_efficiency), ์ด ๋๊ฐ์ง๊ฐ ํ๊ฒํ๊ณ ๋ถํฉํ๋์ง๋ ์๋ ค์ฃผ๋ ํจ์(satisfy_constraint)๋ ๋ง๋ญ๋๋ค. FLOP๊ณผ ๋ฉ๋ชจ๋ฆฌ ๊ณ์ฐ์ ๊ต์๋์ด ์น์ ํ๊ฒ ๋ง๋ค์ด ๋์ผ์ count_net_flops๊ณผ count_peak_activation_size๋ฅผ ์ฌ์ฉํ๋ฉด ๋ฉ๋๋ค.
class AnalyticalEfficiencyPredictor:
def __init__(self, net):
self.net = net
def get_efficiency(self, spec: dict):
self.net.set_active_subnet(**spec)
= self.net.get_active_subnet()
subnet if torch.cuda.is_available():
= subnet.cuda()
subnet ############### YOUR CODE STARTS HERE ###############
# Hint: take a look at the `evaluate_sub_network` function above.
# Hint: the data shape is (batch_size, input_channel, image_size, image_size)
= (1, 3, spec["image_size"], spec["image_size"])
data_shape = count_net_flops(subnet, data_shape)
macs = count_peak_activation_size(subnet, data_shape)
peak_memory ################ YOUR CODE ENDS HERE ################
return dict(millionMACs=macs / 1e6, KBPeakMemory=peak_memory / 1024)
def satisfy_constraint(self, measured: dict, target: dict):
for key in measured:
# if the constraint is not specified, we just continue
if key not in target:
continue
# if we exceed the constraint, just return false.
if measured[key] > target[key]:
return False
# no constraint violated, return true.
return True
Letโs test your implementation for the analytical efficiency predictor by examining the returned values for the smallest and largest subnets we just evaluated a while ago. The results from the efficiency predictor should match with the previous results.
= AnalyticalEfficiencyPredictor(ofa_network)
efficiency_predictor
= 96
image_size # Print out the efficiency of the smallest subnet.
= ofa_network.sample_active_subnet(sample_function=min, image_size=image_size)
smallest_cfg = efficiency_predictor.get_efficiency(smallest_cfg)
eff_smallest
# Print out the efficiency of the largest subnet.
= ofa_network.sample_active_subnet(sample_function=max, image_size=image_size)
largest_cfg = efficiency_predictor.get_efficiency(largest_cfg)
eff_largest
print("Efficiency stats of the smallest subnet:", eff_smallest)
print("Efficiency stats of the largest subnet:", eff_largest)
Efficiency stats of the smallest subnet: {'millionMACs': 8.302128, 'KBPeakMemory': 72.0}
Efficiency stats of the largest subnet: {'millionMACs': 79.416432, 'KBPeakMemory': 270.0}
Question 3: Implement the accuracy predictor.
์ด์ Accuracy predictor๋ฅผ ๋ง๋ค์ด์ผ์ฃ ? ๊ทธ์ ์, ๋ฐ์ดํฐ์ ์ผ๋ก ์ฃผ์ด์ง ๊ฑธ ์ดํด๋ณด๋ ํ๋ผ๋ฏธํฐ๊ฐ ์กฐํฉ์ผ๋ก ๋ณด์ ๋๋ค. ์ด๋ฅผ ๋ฐ์ดํฐ๋ก์จ ์ฐ๊ธฐ ์ํด ์๋ฒ ๋ฉ์ ํด์ผํ๋๋ฐ ๊ทธ ์ญํ ์ ๋ฐ๋ก MCUNetArchEncoder๊ฐ ํฉ๋๋ค. ์ญ์๋ ๊ต์๋์ด ์น์ ํ๊ฒ ๋ง๋ค์ด์ฃผ์ จ๊ตฐ์. ๊ทธ๋ฆฌ๊ณ Accuracy predictor ๋ชจ๋ธ ๊ตฌ์กฐ๋ MLP (multi-layer perception)๋ฅผ ์ฌ์ฉํ ๊ฒ๋๋ค.
The accuracy predictor takes in the architecture of a sub-network and predicts its accuracy on the VWW dataset. Since it is an MLP network, the sub-network must be encoded into a vector. In this lab, we provide a class MCUNetArchEncoder
to perform such conversion from sub-network architecture to a binary vector.
= [96, 112, 128, 144, 160]
image_size_list = MCUNetArchEncoder(
arch_encoder =image_size_list,
image_size_list=ofa_network.base_depth,
base_depth=ofa_network.depth_list,
depth_list=ofa_network.expand_ratio_list,
expand_list=ofa_network.width_mult_list,
width_mult_list )
We generated an accuracy dataset beforehand, which is a collection of [architecture, accuracy]
pairs stored under the acc_datasets
folder.
With the architecture encoder, you are now required define the accuracy predictor, which is a multi-layer perception (MLP) network with 400 channels per intermediate layer. For simplicity, we fix the number of layers to be 3. Please implement this MLP network in the following cell.
class AccuracyPredictor(nn.Module):
def __init__(
self,
arch_encoder,=400,
hidden_size=3,
n_layers=None,
checkpoint_path="cuda:0",
device
):super(AccuracyPredictor, self).__init__()
self.arch_encoder = arch_encoder
self.hidden_size = hidden_size
self.n_layers = n_layers
self.device = device
= []
layers
############### YOUR CODE STARTS HERE ###############
# Let's build an MLP with n_layers layers.
# Each layer (nn.Linear) has hidden_size channels and
# uses nn.ReLU as the activation function.
# Hint: You can assume that n_layers is fixed to be 3, for simplicity.
# Hint: the input dimension of the first layer is not hidden_size.
for i in range(self.n_layers):
layers.append(
nn.Sequential(
nn.Linear(self.arch_encoder.n_dim if i == 0 else self.hidden_size,
self.hidden_size,
),=True),
nn.ReLU(inplace
)
)################ YOUR CODE ENDS HERE ################
self.hidden_size, 1, bias=False))
layers.append(nn.Linear(self.layers = nn.Sequential(*layers)
self.base_acc = nn.Parameter(
1, device=self.device), requires_grad=False
torch.zeros(
)
if checkpoint_path is not None and os.path.exists(checkpoint_path):
= torch.load(checkpoint_path, map_location="cpu")
checkpoint if "state_dict" in checkpoint:
= checkpoint["state_dict"]
checkpoint self.load_state_dict(checkpoint)
print("Loaded checkpoint from %s" % checkpoint_path)
self.layers = self.layers.to(self.device)
def forward(self, x):
= self.layers(x).squeeze()
y return y + self.base_acc
def predict_acc(self, arch_dict_list):
= [self.arch_encoder.arch2feature(arch_dict) for arch_dict in arch_dict_list]
X = torch.tensor(np.array(X)).float().to(self.device)
X return self.forward(X)
Letโs print out the architecture of the AccuracyPredictor
you just defined.
"pretrained", exist_ok=True)
os.makedirs(= (
acc_pred_checkpoint_path f"pretrained/{ofa_network.__class__.__name__}_acc_predictor.pth"
)= AccuracyPredictor(
acc_predictor
arch_encoder,=400,
hidden_size=3,
n_layers=None,
checkpoint_path=device,
device
)print(acc_predictor)
AccuracyPredictor(
(layers): Sequential(
(0): Sequential(
(0): Linear(in_features=128, out_features=400, bias=True)
(1): ReLU(inplace=True)
)
(1): Sequential(
(0): Linear(in_features=400, out_features=400, bias=True)
(1): ReLU(inplace=True)
)
(2): Sequential(
(0): Linear(in_features=400, out_features=400, bias=True)
(1): ReLU(inplace=True)
)
(3): Linear(in_features=400, out_features=1, bias=False)
)
)
๋ฐ์ดํฐ ์ ์ ์ด 4๋ง๊ฐ์ ํ๋ จ๋ฐ์ดํฐ์ ๋ง๊ฐ์ ํ ์คํธ ๋ฐ์ดํฐ๋ก ์๊ณ , Accuracy๋ ๋ชจ๋ธ ํ๋ผ๋ฏธํฐ(architecture)์ ์์ ์ด๋ฃฐ๊ฑฐ๋ผ๋, ๋ด์ฉ์ ๋๋ค. ํ๋ ๋, ํ๋ผ๋ฏธํฐ๋ฅผ one-hot representation ๋ก ๋ฐ๊พธ๋ ๊ณผ์ ๋ ์์ง๋ง์์ฃ ! ๋ค์ ๊ฒฐ๊ณผ๋ฅผ ๋ณด์๋ฉด โkernel size embedding: [0 1 0] => kernel size: 5; expand ratio embedding: [0 1 0] => expand ratio: 4โ ์ด๋ฌ๋ฉด์ ๋ชจ๋ธ ๊ตฌ์กฐ๊ฐ ์๋ฒ ๋ฉ๋ ๊ฑธ ํ์ธํ์ค ์ ์์ด์
Letโs first visualize some samples in the accuracy dataset in the following cell.
The accuracy dataset is composed of 50,000 [architecture, accuracy]
pairs, where 40,000 of them are used as the training set and the rest 10,000 are used as validation set.
For accuracy, We calculate the average accuracy of all [architecture, accuracy]
pairs on the accuracy dataset and define it as base_acc
. For the accuracy predictor, instead of directly regressing the accuracy of each architecture, its training target is accuracy - base_acc
. Since accuracy - base_acc
is usually much smaller than accuracy
itself, this can make training easier.
For architecture, each subnet within the design space is uniquely represented by a binary vector. The binary vector is a concatenation of the one-hot representation for both global parameters (e.g. input resolution, width multiplier) and parameters of each inverted MobileNet block (e.g. kernel sizes and expand ratios). Note that we prefer one-hot representations over numerical representations because all design hyperparameters are discrete values.
For example, our design space supports
= [3, 5, 7]
kernel_size = [3, 4, 6] expand_ratio
Then, we represent kernel_size=3
as [1, 0, 0]
, kernel_size=5
as [0, 1, 0]
, and kernel_size=7
as [0, 0, 1]
. Similarly, for expand_ratio=3
, it is written as [1, 0, 0]
; expand_ratio=4
is written as [0, 1, 0]
and expand_ratio=6
is written as [0, 0, 1]
. The representation for each inverted MobileNet block is obtained by concatenating the kernel size embedding with the expand ratio embedding. Note that for skipped blocks, we use [0, 0, 0]
to represent their kernel sizes and expand ratios. You will see a detailed explanation of the architecture-embedding correspondence after running the following cell.
= AccuracyDataset("acc_datasets")
acc_dataset = acc_dataset.build_acc_data_loader(
train_loader, valid_loader, base_acc =arch_encoder
arch_encoder
)
print(f"The basic accuracy (mean accuracy of all subnets within the dataset is: {(base_acc * 100): .1f}%.")
# Let's print one sample in the training set
= 0
sampled for (data, label) in train_loader:
= data.to(device)
data = label.to(device)
label print("=" * 100)
# dummy pass to print the divided encoding
= arch_encoder.feature2arch(data[0].int().cpu().numpy(), verbose=False)
arch_encoding # print out the architecture encoding process in detail
= arch_encoder.feature2arch(data[0].int().cpu().numpy(), verbose=True)
arch_encoding
visualize_subnet(arch_encoding)print(f"The accuracy of this subnet on the holdout validation set is: {(label[0] * 100): .1f}%.")
+= 1
sampled if sampled == 1:
break
Loading data: 100%|โโโโโโโโโโ| 50000/50000 [00:00<00:00, 228025.66it/s]
Train Size: 40000, Valid Size: 10000
The basic accuracy (mean accuracy of all subnets within the dataset is: 90.3%.
====================================================================================================
network embedding: [1 0 0 0 0 | 0 1 0 | 0 1 0 | 0 1 0 | 1 0 0 | 0 0 1 | 1 0 0 | 1 0 0 | 0 0 1 | 1 0 0 | 0 1 0 | 0 1 0 | 0 0 1 | 0 0 1 | 0 0 0 | 0 0 0 | 0 1 0 | 0 0 1 | 0 1 0 | 0 0 1 | 0 1 0 | 0 1 0 | 0 1 0 | 0 0 1 | 1 0 0 | 1 0 0 | 0 1 0 | 0 1 0 | 0 0 1 | 0 0 1 | 0 1 0 | 0 0 1 | 0 0 1 | 1 0 0 | 0 1 0 | 0 0 1 | 0 0 0 | 0 0 0 | 0 0 0 | 0 0 0 | 0 1 0 | 0 0 1]
image resolution embedding: [1 0 0 0 0] => image resolution: 96
width multiplier embedding: [0 1 0] => width multiplier: 0.75
**************************************************Stage1**************************************************
kernel size embedding: [0 1 0] => kernel size: 5; expand ratio embedding: [0 1 0] => expand ratio: 4
kernel size embedding: [1 0 0] => kernel size: 3; expand ratio embedding: [0 0 1] => expand ratio: 6
kernel size embedding: [1 0 0] => kernel size: 3; expand ratio embedding: [1 0 0] => expand ratio: 3
**************************************************Stage2**************************************************
kernel size embedding: [0 0 1] => kernel size: 7; expand ratio embedding: [1 0 0] => expand ratio: 3
kernel size embedding: [0 1 0] => kernel size: 5; expand ratio embedding: [0 1 0] => expand ratio: 4
kernel size embedding: [0 0 1] => kernel size: 7; expand ratio embedding: [0 0 1] => expand ratio: 6
kernel size embedding: [0 0 0] expand ratio embedding: [0 0 0] => layer skipped.
**************************************************Stage3**************************************************
kernel size embedding: [0 1 0] => kernel size: 5; expand ratio embedding: [0 0 1] => expand ratio: 6
kernel size embedding: [0 1 0] => kernel size: 5; expand ratio embedding: [0 0 1] => expand ratio: 6
kernel size embedding: [0 1 0] => kernel size: 5; expand ratio embedding: [0 1 0] => expand ratio: 4
kernel size embedding: [0 1 0] => kernel size: 5; expand ratio embedding: [0 0 1] => expand ratio: 6
**************************************************Stage4**************************************************
kernel size embedding: [1 0 0] => kernel size: 3; expand ratio embedding: [1 0 0] => expand ratio: 3
kernel size embedding: [0 1 0] => kernel size: 5; expand ratio embedding: [0 1 0] => expand ratio: 4
kernel size embedding: [0 0 1] => kernel size: 7; expand ratio embedding: [0 0 1] => expand ratio: 6
kernel size embedding: [0 1 0] => kernel size: 5; expand ratio embedding: [0 0 1] => expand ratio: 6
**************************************************Stage5**************************************************
kernel size embedding: [0 0 1] => kernel size: 7; expand ratio embedding: [1 0 0] => expand ratio: 3
kernel size embedding: [0 1 0] => kernel size: 5; expand ratio embedding: [0 0 1] => expand ratio: 6
kernel size embedding: [0 0 0] expand ratio embedding: [0 0 0] => layer skipped.
kernel size embedding: [0 0 0] expand ratio embedding: [0 0 0] => layer skipped.
**************************************************Stage6**************************************************
kernel size embedding: [0 1 0] => kernel size: 5; expand ratio embedding: [0 0 1] => expand ratio: 6
The accuracy of this subnet on the holdout validation set is: 88.7%.
Question 4: Complete the code for accuracy predictor training.
ํ๋ จํ ์๊ฐ์ ๋๋ค!
= torch.nn.L1Loss().to(device)
criterion = torch.optim.Adam(acc_predictor.parameters())
optimizer # the default value is zero
+= base_acc
acc_predictor.base_acc.data for epoch in tqdm(range(10)):
acc_predictor.train()for (data, label) in tqdm(train_loader, desc="Epoch%d" % (epoch + 1), position=0, leave=True):
# step 1. Move the data and labels to device (cuda:0).
= data.to(device)
data = label.to(device)
label ############### YOUR CODE STARTS HERE ###############
# step 2. Run forward pass.
= acc_predictor(data)
pred # step 3. Calculate the loss.
= criterion(pred, label)
loss # step 4. Perform the backward pass.
optimizer.zero_grad()
loss.backward()
optimizer.step()################ YOUR CODE ENDS HERE ################
eval()
acc_predictor.with torch.no_grad():
with tqdm(total=len(valid_loader), desc="Val", position=0, leave=True) as t:
for (data, label) in valid_loader:
# step 1. Move the data and labels to device (cuda:0).
= data.to(device)
data = label.to(device)
label ############### YOUR CODE STARTS HERE ###############
# step 2. Run forward pass.
= acc_predictor(data)
pred # step 3. Calculate the loss.
= criterion(pred, label)
loss ############### YOUR CODE ENDS HERE ###############
"loss": loss.item()})
t.set_postfix({1)
t.update(
if not os.path.exists(acc_pred_checkpoint_path):
torch.save(acc_predictor.cpu().state_dict(), acc_pred_checkpoint_path)
Epoch1: 100%|โโโโโโโโโโ| 157/157 [00:00<00:00, 362.86it/s]
Val: 100%|โโโโโโโโโโ| 40/40 [00:00<00:00, 109.00it/s, loss=0.00374]
Epoch2: 100%|โโโโโโโโโโ| 157/157 [00:00<00:00, 262.66it/s]
Val: 100%|โโโโโโโโโโ| 40/40 [00:00<00:00, 141.77it/s, loss=0.0026]
Epoch3: 100%|โโโโโโโโโโ| 157/157 [00:00<00:00, 241.87it/s]
Val: 100%|โโโโโโโโโโ| 40/40 [00:00<00:00, 118.13it/s, loss=0.00251]
Epoch4: 100%|โโโโโโโโโโ| 157/157 [00:00<00:00, 336.42it/s]
Val: 100%|โโโโโโโโโโ| 40/40 [00:00<00:00, 119.41it/s, loss=0.00259]
Epoch5: 100%|โโโโโโโโโโ| 157/157 [00:00<00:00, 331.75it/s]
Val: 100%|โโโโโโโโโโ| 40/40 [00:00<00:00, 117.39it/s, loss=0.00242]
Epoch6: 100%|โโโโโโโโโโ| 157/157 [00:00<00:00, 341.96it/s]
Val: 100%|โโโโโโโโโโ| 40/40 [00:00<00:00, 96.35it/s, loss=0.00235]
Epoch7: 100%|โโโโโโโโโโ| 157/157 [00:00<00:00, 321.68it/s]
Val: 100%|โโโโโโโโโโ| 40/40 [00:00<00:00, 122.19it/s, loss=0.0023]
Epoch8: 100%|โโโโโโโโโโ| 157/157 [00:00<00:00, 307.33it/s]
Val: 100%|โโโโโโโโโโ| 40/40 [00:00<00:00, 121.72it/s, loss=0.00178]
Epoch9: 100%|โโโโโโโโโโ| 157/157 [00:00<00:00, 329.76it/s]
Val: 100%|โโโโโโโโโโ| 40/40 [00:00<00:00, 119.59it/s, loss=0.00203]
Epoch10: 100%|โโโโโโโโโโ| 157/157 [00:00<00:00, 308.76it/s]
Val: 100%|โโโโโโโโโโ| 40/40 [00:00<00:00, 99.72it/s, loss=0.00195]
100%|โโโโโโโโโโ| 10/10 [00:08<00:00, 1.17it/s]
ํ๋ จํ ๋ชจ๋ธ์ Prediction๊ณผ ์ค์ ์์น์ Corrleation์ด ๊ทธ๋ํ๋ก ๋ณด์ด๋ค์. โLinearโ ํฉ๋๋ค.
= []
predicted_accuracies = []
ground_truth_accuracies = acc_predictor.to("cuda:0")
acc_predictor eval()
acc_predictor.with torch.no_grad():
with tqdm(total=len(valid_loader), desc="Val") as t:
for (data, label) in valid_loader:
= data.to(device)
data = label.to(device)
label = acc_predictor(data)
pred += pred.cpu().numpy().tolist()
predicted_accuracies += label.cpu().numpy().tolist()
ground_truth_accuracies if len(predicted_accuracies) > 200:
break
plt.scatter(predicted_accuracies, ground_truth_accuracies)# draw y = x
= min(predicted_accuracies), max(predicted_accuracies)
min_acc, max_acc print(min_acc, max_acc)
="red", linewidth=2)
plt.plot([min_acc, max_acc], [min_acc, max_acc], c"Predicted accuracy")
plt.xlabel("Measured accuracy")
plt.ylabel("Correlation between predicted accuracy and real accuracy") plt.title(
Val: 0%| | 0/40 [00:00<?, ?it/s]
0.8604847192764282 0.9356203079223633
Text(0.5, 1.0, 'Correlation between predicted accuracy and real accuracy')
Part 2. Neural Architecture Search
๋๋์ด ๋ง์ง๋ง ๋จ๊ณ์ ๋๋ค. ์ํ๋ ๋ชจ๋ธ์ ์ฐพ์๋ณด์ฃ ! ๋ ๊ฐ์ง Search ๋ฐฉ๋ฒ์ ์ด์ฉํ ๊ฑด๋ฐ, ํ๋๋ Random Search์ด๊ณ , ๋ค๋ฅธ ํ๋๋ evolutionary Search ์ด์ฉํ Neural Architecture Search์ ๋๋ค(๋๋์ด NAS!).
Question 5: Complete the following random search agent.
Random Search๋ ์ด์ฌํ constraint์ ํด๋นํ๋ Sample์ ๋ชจ์์ ์ต๊ณ ์ Accuracy๋ฅผ ๊ฐ์ง ๋ชจ๋ธ ๊ตฌ์กฐ๋ฅผ ๊ณ ๋ฅด๋ฉด ๋ฉ๋๋ค.
class RandomSearcher:
def __init__(self, efficiency_predictor, accuracy_predictor):
self.efficiency_predictor = efficiency_predictor
self.accuracy_predictor = accuracy_predictor
def random_valid_sample(self, constraint):
# randomly sample subnets until finding one that satisfies the constraint
while True:
= self.accuracy_predictor.arch_encoder.random_sample_arch()
sample = self.efficiency_predictor.get_efficiency(sample)
efficiency if self.efficiency_predictor.satisfy_constraint(efficiency, constraint):
return sample, efficiency
def run_search(self, constraint, n_subnets=100):
= []
subnet_pool # sample subnets
for _ in tqdm(range(n_subnets)):
= self.random_valid_sample(constraint)
sample, efficiency
subnet_pool.append(sample)# predict the accuracy of subnets
= self.accuracy_predictor.predict_acc(subnet_pool)
accs ############### YOUR CODE STARTS HERE ###############
# hint: one line of code
# get the index of the best subnet
= accs.argmax()
best_idx ############### YOUR CODE ENDS HERE #################
# return the best subnet
return accs[best_idx], subnet_pool[best_idx]
Question 6: Complete the following function.
Note: MACs 100M results lower than MACs 50M, Prof. Han says this might not be intuitive.
def search_and_measure_acc(agent, constraint, **kwargs):
############### YOUR CODE STARTS HERE ###############
# hint: call the search function
= agent.run_search(constraint=constraint, **kwargs)
best_info ############### YOUR CODE ENDS HERE #################
# get searched subnet
print("Best info: ", best_info)
**best_info[1])
ofa_network.set_active_subnet(= ofa_network.get_active_subnet().to(device)
subnet # calibrate bn
128, best_info[1]["image_size"]) # ?
calib_bn(subnet, data_dir, # build val loader
= build_val_data_loader(data_dir, best_info[1]["image_size"], 128)
val_loader # measure accuracy
= validate(subnet, val_loader)
acc # print best_info
print(f"Accuracy of the selected subnet: {acc}")
# visualize model architecture
1])
visualize_subnet(best_info[return acc, subnet
1)
random.seed(1)
np.random.seed(= RandomSearcher(efficiency_predictor, acc_predictor)
nas_agent # MACs-constrained search
= {}
subnets_rs_macs for millonMACs in [50, 100]:
= dict(millonMACs=millonMACs)
search_constraint print(f"Random search with constraint: MACs <= {millonMACs}M")
= search_and_measure_acc(nas_agent, search_constraint, n_subnets=300)
subnets_rs_macs[millonMACs]
# memory-constrained search
= {}
subnets_rs_memory for KBPeakMemory in [256, 512]:
= dict(KBPeakMemory=KBPeakMemory)
search_constraint print(f"Random search with constraint: Peak memory <= {KBPeakMemory}KB")
= search_and_measure_acc(nas_agent, search_constraint, n_subnets=300) subnets_rs_memory[KBPeakMemory]
Random search with constraint: MACs <= 50M
Best info: (tensor(0.9327, device='cuda:0', grad_fn=<SelectBackward0>), {'ks': [5, 7, 5, 3, 3, 7, 5, 3, 7, 3, 3, 3, 7, 5, 5, 5, 7, 5, 3, 7], 'e': [4, 3, 3, 6, 4, 3, 6, 6, 4, 3, 3, 4, 4, 6, 6, 4, 3, 4, 4, 3], 'd': [2, 2, 1, 1, 2, 0], 'image_size': 160, 'wid': 1})
Accuracy of the selected subnet: 93.27543427346657
Random search with constraint: MACs <= 100M
Best info: (tensor(0.9329, device='cuda:0', grad_fn=<SelectBackward0>), {'ks': [3, 3, 5, 7, 7, 5, 5, 7, 7, 3, 5, 5, 5, 3, 7, 5, 7, 5, 5, 3], 'e': [4, 3, 6, 3, 3, 6, 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 6, 6, 3], 'd': [2, 2, 1, 1, 2, 2], 'image_size': 160, 'wid': 1})
Accuracy of the selected subnet: 93.47394543971969
Random search with constraint: Peak memory <= 256KB
Best info: (tensor(0.9248, device='cuda:0', grad_fn=<SelectBackward0>), {'ks': [7, 7, 5, 7, 7, 5, 3, 5, 3, 5, 3, 5, 7, 3, 3, 5, 7, 7, 5, 3], 'e': [4, 4, 6, 4, 6, 3, 6, 4, 6, 6, 6, 4, 4, 4, 4, 6, 3, 6, 4, 4], 'd': [1, 1, 1, 1, 1, 1], 'image_size': 160, 'wid': 0})
Accuracy of the selected subnet: 92.8287840963889
Random search with constraint: Peak memory <= 512KB
Best info: (tensor(0.9328, device='cuda:0', grad_fn=<SelectBackward0>), {'ks': [3, 3, 3, 3, 5, 5, 5, 7, 5, 7, 5, 5, 7, 5, 7, 7, 7, 7, 3, 3], 'e': [4, 3, 6, 4, 6, 6, 4, 6, 4, 4, 4, 6, 4, 4, 3, 6, 4, 4, 3, 6], 'd': [2, 1, 2, 0, 2, 0], 'image_size': 160, 'wid': 1})
Accuracy of the selected subnet: 93.15136479455839
100%|โโโโโโโโโโ| 300/300 [00:19<00:00, 15.43it/s]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:01<00:00, 30.42it/s, loss=0.187, top1=93.3]
100%|โโโโโโโโโโ| 300/300 [00:19<00:00, 15.05it/s]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:01<00:00, 30.58it/s, loss=0.186, top1=93.5]
100%|โโโโโโโโโโ| 300/300 [00:43<00:00, 6.83it/s]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:00<00:00, 34.29it/s, loss=0.204, top1=92.8]
100%|โโโโโโโโโโ| 300/300 [00:22<00:00, 13.54it/s]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:01<00:00, 29.29it/s, loss=0.19, top1=93.2]
Question 7: Complete the following evolutionary search agent.
Evolutionary Search๋ ์ฌ๊ธฐ์ ํ ๋จ๊ณ๊ฐ ๋ ๋ค์ด๊ฐ์. ๋ฐ๋ก โCrossoverโ์ด๋ผ๋ ๋จ๊ณ์ธ๋ฐ, ๋๋ค์ผ๋ก ๋ฝ์ ๋ ์ํ์์ ๋ ๋ค์ ๋ชจ๋ธ ๊ตฌ์กฐ์์ ๋ถ๋ถ์ ๋๋ ๋๋ค์ผ๋ก ๋ ์ค์ ํ๋๋ฅผ ๋ฝ์์ ํฉ์น๋ ๊ณผ์ ์ ๋๋ค. sub-network๊ฐ ๋ฝํ๋ ๊ฐ์๋ฅผ population์ด๋ผ๊ณ ํ๋ฉด์ ๋๋ค ์ํ๋ง์ ํ๋ ํ์๋ฅผ generation, ๊ทธ๋ฆฌ๊ณ max_time_budget์ ๋ฌ์ ๋ช๋ฒ์ generation์ ๊ฑฐ์น ๊ฒ์ธ๊ฐ๋ ์ ํํฉ๋๋ค. ๋งค generation๋ง๋ค ๋ชจ์ด๋ population์ ์ ๋ ฌํ ๊ฑฐ๊ฑฐ๋ ์.
Now you have succesfully implemented the random search algorithm. In this part, we will implement a more sample-efficient search algorithm, evolutionary search. Evolutionary search is inspired by the evolution algorithm (or genetic algorithm). A population of sub-networks are first sampled from the design space. Then, in each generation, we perform random mutation and crossover operations as is shown in the figure above. The sub-networks with highest accuracy will be kept, and this process will be repeated until the number of generations reaches max_time_budget
. Similar to the random search, throughout the search process, all sub-networks that cannot satisfy the efficiency constraint will be discarded.
class EvolutionSearcher:
def __init__(self, efficiency_predictor, accuracy_predictor, **kwargs):
self.efficiency_predictor = efficiency_predictor
self.accuracy_predictor = accuracy_predictor
# evolution hyper-parameters
self.arch_mutate_prob = kwargs.get("arch_mutate_prob", 0.1)
self.resolution_mutate_prob = kwargs.get("resolution_mutate_prob", 0.5)
self.population_size = kwargs.get("population_size", 100)
self.max_time_budget = kwargs.get("max_time_budget", 500)
self.parent_ratio = kwargs.get("parent_ratio", 0.25)
self.mutation_ratio = kwargs.get("mutation_ratio", 0.5)
def update_hyper_params(self, new_param_dict):
self.__dict__.update(new_param_dict)
def random_valid_sample(self, constraint):
# randomly sample subnets until finding one that satisfies the constraint
while True:
= self.accuracy_predictor.arch_encoder.random_sample_arch()
sample = self.efficiency_predictor.get_efficiency(sample)
efficiency if self.efficiency_predictor.satisfy_constraint(efficiency, constraint):
return sample, efficiency
def mutate_sample(self, sample, constraint):
while True:
= copy.deepcopy(sample)
new_sample
self.accuracy_predictor.arch_encoder.mutate_resolution(new_sample, self.resolution_mutate_prob)
self.accuracy_predictor.arch_encoder.mutate_width(new_sample, self.arch_mutate_prob)
self.accuracy_predictor.arch_encoder.mutate_arch(new_sample, self.arch_mutate_prob)
= self.efficiency_predictor.get_efficiency(new_sample)
efficiency if self.efficiency_predictor.satisfy_constraint(efficiency, constraint):
return new_sample, efficiency
def crossover_sample(self, sample1, sample2, constraint):
while True:
= copy.deepcopy(sample1)
new_sample for key in new_sample.keys():
if not isinstance(new_sample[key], list):
############### YOUR CODE STARTS HERE ###############
# hint: randomly choose the value from sample1[key] and sample2[key], random.choice
= random.choice([sample1[key], sample2[key]])
new_sample[key] ############### YOUR CODE ENDS HERE #################
else:
for i in range(len(new_sample[key])):
############### YOUR CODE STARTS HERE ###############
= random.choice([sample1[key][i], sample2[key][i]])
new_sample[key][i] ############### YOUR CODE ENDS HERE #################
= self.efficiency_predictor.get_efficiency(new_sample)
efficiency if self.efficiency_predictor.satisfy_constraint(efficiency, constraint):
return new_sample, efficiency
def run_search(self, constraint, **kwargs):
self.update_hyper_params(kwargs)
= int(round(self.mutation_ratio * self.population_size))
mutation_numbers = int(round(self.parent_ratio * self.population_size))
parents_size
= [-100]
best_valids = [] # (acc, sample) tuples
population = []
child_pool = None
best_info # generate random population
for _ in range(self.population_size):
= self.random_valid_sample(constraint)
sample, efficiency
child_pool.append(sample)
= self.accuracy_predictor.predict_acc(child_pool)
accs for i in range(self.population_size):
population.append((accs[i].item(), child_pool[i]))
# evolving the population
with tqdm(total=self.max_time_budget) as t:
for i in range(self.max_time_budget):
############### YOUR CODE STARTS HERE ###############
# hint: sort the population according to the acc (descending order)
= sorted(population, key=lambda x: x[0], reverse=True)
population ############### YOUR CODE ENDS HERE #################
############### YOUR CODE STARTS HERE ###############
# hint: keep topK samples in the population, K = parents_size
# the others are discarded.
= population[:parents_size]
population ############### YOUR CODE ENDS HERE #################
# update best info
= population[0][0]
acc if acc > best_valids[-1]:
best_valids.append(acc)= population[0]
best_info else:
-1])
best_valids.append(best_valids[
= []
child_pool for j in range(mutation_numbers):
# randomly choose a sample
= population[np.random.randint(parents_size)][1]
par_sample # mutate this sample
= self.mutate_sample(par_sample, constraint)
new_sample, efficiency
child_pool.append(new_sample)
for j in range(self.population_size - mutation_numbers):
# randomly choose two samples
= population[np.random.randint(parents_size)][1]
par_sample1 = population[np.random.randint(parents_size)][1]
par_sample2 # crossover
= self.crossover_sample(
new_sample, efficiency
par_sample1, par_sample2, constraint
)
child_pool.append(new_sample)# predict accuracy with the accuracy predictor
= self.accuracy_predictor.predict_acc(child_pool)
accs for j in range(self.population_size):
population.append((accs[j].item(), child_pool[j]))
1)
t.update(
return best_info
Question 8: Run evolutionary search and tune evo_params to optimize the results. Describe your findings.
๋จ์ ๋ถ๋ถ์ ์คํ, ์คํ, ์คํ์ ๋๋ค. ๊ด์ฐฐํด๋ณด์์ฃ !
Answer: - The default population size and time budget are too small. Increasing them can effectively improves the final results. But it also increases the search cost. - Increasing the probability of resolution mutation can improve the final results. (hint: Question 1)
1)
random.seed(1)
np.random.seed(
= {
evo_params 'arch_mutate_prob': 0.1, # The probability of architecture mutation in evolutionary search
'resolution_mutate_prob': 0.1, # The probability of resolution mutation in evolutionary search
'population_size': 10, # The size of the population
'max_time_budget': 10,
'parent_ratio': 0.1,
'mutation_ratio': 0.1,
}
= EvolutionSearcher(efficiency_predictor, acc_predictor, **evo_params)
nas_agent # MACs-constrained search
= {}
subnets_evo_macs for millonMACs in [50, 100]:
= dict(millionMACs=millonMACs)
search_constraint print(f"Evolutionary search with constraint: MACs <= {millonMACs}M")
= search_and_measure_acc(nas_agent, search_constraint)
subnets_evo_macs[millonMACs]
# memory-constrained search
= {}
subnets_evo_memory for KBPeakMemory in [256, 512]:
= dict(KBPeakMemory=KBPeakMemory)
search_constraint print(f"Evolutionary search with constraint: Peak memory <= {KBPeakMemory}KB")
= search_and_measure_acc(nas_agent, search_constraint) subnets_evo_memory[KBPeakMemory]
Evolutionary search with constraint: MACs <= 50M
Accuracy of the selected subnet: 92.28287844220107
Evolutionary search with constraint: MACs <= 100M
Accuracy of the selected subnet: 92.33250616939725
Evolutionary search with constraint: Peak memory <= 256KB
Accuracy of the selected subnet: 92.45657571267253
Evolutionary search with constraint: Peak memory <= 512KB
Accuracy of the selected subnet: 93.05210921143184
100%|โโโโโโโโโโ| 10/10 [00:05<00:00, 1.72it/s]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:00<00:00, 35.68it/s, loss=0.214, top1=92.3]
100%|โโโโโโโโโโ| 10/10 [00:07<00:00, 1.42it/s]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:00<00:00, 36.25it/s, loss=0.206, top1=92.3]
100%|โโโโโโโโโโ| 10/10 [00:07<00:00, 1.35it/s]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:00<00:00, 35.39it/s, loss=0.21, top1=92.5]
100%|โโโโโโโโโโ| 10/10 [00:05<00:00, 1.74it/s]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:00<00:00, 32.62it/s, loss=0.193, top1=93.1]
Question 9: Run evolutionary search under real-world constraints.
In real-world applications, we may have multiple efficiency constraints: https://blog.tensorflow.org/2019/10/visual-wake-words-with-tensorflow-lite_30.html. Use evolutionary search to find models that satisfy the following constraints: - [15 pts] 250 KB, 60M MACs (acc >= 92.5% to get the full credit) - [10 pts, bonus] 200KB, 30M MACs (acc >= 90% to get the full credit)
Hint: You do not have to use the same evo_params
for these two tasks.
1)
random.seed(1)
np.random.seed(= {
evo_params 'arch_mutate_prob': 0.1, # The probability of architecture mutation in evolutionary search
'resolution_mutate_prob': 0.5, # The probability of resolution mutation in evolutionary search
'population_size': 50, # The size of the population
'max_time_budget': 20,
'parent_ratio': 0.25,
'mutation_ratio': 0.3,
}
= EvolutionSearcher(efficiency_predictor, acc_predictor, **evo_params)
nas_agent
= [60, 250]
(millionMACs, KBPeakMemory) print(f"Evolution search with constraint: MACs <= {millionMACs}M, peak memory <= {KBPeakMemory}KB")
dict(millionMACs=millionMACs, KBPeakMemory=KBPeakMemory))
search_and_measure_acc(nas_agent, print("Evolution search finished!")
Evolution search with constraint: MACs <= 60M, peak memory <= 250KB
Accuracy of the selected subnet: 92.60545903435415
Evolution search finished!
100%|โโโโโโโโโโ| 20/20 [01:13<00:00, 3.66s/it]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:00<00:00, 36.77it/s, loss=0.201, top1=92.6]
1)
random.seed(1)
np.random.seed(= {
evo_params 'arch_mutate_prob': 0.1, # The probability of architecture mutation in evolutionary search
'resolution_mutate_prob': 0.5, # The probability of resolution mutation in evolutionary search
'population_size': 50, # The size of the population
'max_time_budget': 30,
'parent_ratio': 0.25,
'mutation_ratio': 0.3,
}= EvolutionSearcher(efficiency_predictor, acc_predictor, **evo_params)
nas_agent
= [30, 200]
(millionMACs, KBPeakMemory) print(f"Evolution search with constraint: MACs <= {millionMACs}M, peak memory <= {KBPeakMemory}KB")
dict(millionMACs=millionMACs, KBPeakMemory=KBPeakMemory))
search_and_measure_acc(nas_agent, print("Evolution search finished!")
Evolution search with constraint: MACs <= 30M, peak memory <= 200KB
Accuracy of the selected subnet: 90.54590573748644
Evolution search finished!
100%|โโโโโโโโโโ| 30/30 [01:45<00:00, 3.52s/it]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:00<00:00, 37.25it/s, loss=0.241, top1=90.5]
1)
random.seed(1)
np.random.seed(= {
evo_params 'arch_mutate_prob': 0.1, # The probability of architecture mutation in evolutionary search
'resolution_mutate_prob': 0.5, # The probability of resolution mutation in evolutionary search
'population_size': 50, # The size of the population
'max_time_budget': 20,
'parent_ratio': 0.25,
'mutation_ratio': 0.3,
}
= EvolutionSearcher(efficiency_predictor, acc_predictor, **evo_params)
nas_agent
= [15, 256]
(millionMACs, KBPeakMemory) print(f"Evolution search with constraint: MACs <= {millionMACs}M, peak memory <= {KBPeakMemory}KB")
dict(millionMACs=millionMACs, KBPeakMemory=KBPeakMemory))
search_and_measure_acc(nas_agent, print("Evolution search finished!")
Evolution search with constraint: MACs <= 15M, peak memory <= 256KB
Best info: (0.8773346543312073, {'ks': [7, 5, 5, 3, 5, 3, 5, 7, 5, 5, 7, 5, 3, 7, 3, 5, 3, 3, 7, 5], 'e': [4, 6, 6, 6, 4, 3, 3, 4, 3, 4, 4, 6, 6, 4, 6, 4, 3, 4, 3, 3], 'd': [0, 1, 0, 1, 1, 1], 'image_size': 96, 'wid': 0})
Accuracy of the selected subnet: 86.35235731631296
Evolution search finished!
100%|โโโโโโโโโโ| 20/20 [01:29<00:00, 4.48s/it]
Validate: 100%|โโโโโโโโโโ| 32/32 [00:00<00:00, 45.67it/s, loss=0.319, top1=86.4]
1)
random.seed(1)
np.random.seed(= {
evo_params 'arch_mutate_prob': 0.1, # The probability of architecture mutation in evolutionary search
'resolution_mutate_prob': 0.5, # The probability of resolution mutation in evolutionary search
'population_size': 50, # The size of the population
'max_time_budget': 20,
'parent_ratio': 0.25,
'mutation_ratio': 0.3,
}
= EvolutionSearcher(efficiency_predictor, acc_predictor, **evo_params)
nas_agent
= [60, 64]
(millionMACs, KBPeakMemory) print(f"Evolution search with constraint: MACs <= {millionMACs}M, peak memory <= {KBPeakMemory}KB")
dict(millionMACs=millionMACs, KBPeakMemory=KBPeakMemory))
search_and_measure_acc(nas_agent, print("Evolution search finished!")
Evolution search with constraint: MACs <= 60M, peak memory <= 64KB
KeyboardInterrupt:
1)
random.seed(1)
np.random.seed(= {
evo_params 'arch_mutate_prob': 0.1, # The probability of architecture mutation in evolutionary search
'resolution_mutate_prob': 0.5, # The probability of resolution mutation in evolutionary search
'population_size': 50, # The size of the population
'max_time_budget': 20,
'parent_ratio': 0.25,
'mutation_ratio': 0.3,
}
= EvolutionSearcher(efficiency_predictor, acc_predictor, **evo_params)
nas_agent
= [10, 64]
(millionMACs, KBPeakMemory) print(f"Evolution search with constraint: MACs <= {millionMACs}M, peak memory <= {KBPeakMemory}KB")
dict(millionMACs=millionMACs, KBPeakMemory=KBPeakMemory))
search_and_measure_acc(nas_agent, print("Evolution search finished!")
Evolution search with constraint: MACs <= 10M, peak memory <= 64KB
Question 10: Is it possible to find a subnet with the following efficiency constraints in the current design space?
- A: The activation size of the subnet is at most 256KB and the MACs of the subnet is at most 15M.
- B: The activation size of the subnet is at most 64 KB.
Answer:
A: yes
B: no