Table of Contents¶

Package Setup
Fetch data
Imports
Data explore
Dataset
Model
Transfer learn
- Find LR
- Train
Finetune
- Find LR
- Train
Test
Visualize

Package setup ¶

%%capture
!pip install torch==1.7.0+cu101 torchvision==0.8.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html

%%capture 
!pip install pytorch-lightning einops

!pip install "opencv-python-headless<4.3"

# %%capture
!pip install albumentations==0.5.0

Fetch data ¶

Using card detection data from howl0893 github repo

!mkdir data

%cd data

/content/data

!git clone https://github.com/howl0893/custom-object-detection-datasets.git

Cloning into 'custom-object-detection-datasets'...
remote: Enumerating objects: 75, done.
remote: Counting objects: 100% (75/75), done.
remote: Compressing objects: 100% (74/74), done.
remote: Total 1060 (delta 39), reused 0 (delta 0), pack-reused 985
Receiving objects: 100% (1060/1060), 129.20 MiB | 41.63 MiB/s, done.
Resolving deltas: 100% (444/444), done.

%cd /content/data/custom-object-detection-datasets/datasets/cards/images/

/content/data/custom-object-detection-datasets/datasets/cards/images

Imports ¶

import torch
from torch.utils.data import Dataset, DataLoader
import torchvision as V

import pandas as pd

from PIL import Image, ImageDraw

import numpy as np

import albumentations as A
import cv2

import pytorch_lightning as pl

!pwd

/content/data/custom-object-detection-datasets/datasets/cards/images

!ls

lightning_logs	test  test_labels.csv  train  train_labels.csv

Data explore ¶

f = open('train_labels.csv').read()

df = pd.read_csv('train_labels.csv')

df

df[(df.filename == 'cam_image35.jpg')]

draw = ImageDraw.Draw(img)

for row in df[(df.filename == 'cam_image35.jpg')].iterrows():
  # print(row[1])
  draw.rectangle([row[1].xmin, row[1].ymin, row[1].xmax, row[1].ymax], outline='red')

img

Dataset ¶

classes = {'ace': 5, 'jack': 4, 'king': 3, 'nine': 2, 'queen': 6, 'ten': 1}

class CardDS(Dataset):
    def __init__(self, train = True):
      self.train = train
      if train:
        self.df = pd.read_csv('train_labels.csv')
      else:
        self.df = pd.read_csv('test_labels.csv')
      self.files = self.df.filename.unique()
      self.transform = A.Compose([
          A.HorizontalFlip(p=0.5),
          A.Resize(512, 512)
      ], bbox_params=A.BboxParams(format='pascal_voc'))

    def __len__(self):
      return len(self.files)

    def __getitem__(self, idx):
      f = self.files[idx]
      if self.train:
        img = Image.open(f'train/{f}')
        # img = V.io.read_image(f'train/{f}')
      else:
        img = Image.open(f'test/{f}')
        # img = V.io.read_image(f'test/{f}')
      img = np.array(img)

      annots = []
      for row in self.df[self.df.filename == f].iterrows():
        # print(row)
        annots += [(row[1].xmin, row[1].ymin, row[1].xmax, row[1].ymax, classes[row[1]['class']])]
      
      out = self.transform(image=img, bboxes=annots)
      img = V.transforms.functional.to_tensor(out['image'])
      out['labels'] = torch.Tensor([x[-1] for x in out['bboxes']]).long()
      out['boxes'] = torch.Tensor([x[:4] for x in out['bboxes']])
      out.pop('image')
      out.pop('bboxes')

      return img, out

ds = CardDS()

len(ds)

296

ds.df['class'].value_counts()

king     95
ace      92
nine     89
jack     89
ten      82
queen    80
Name: class, dtype: int64

idx = np.random.randint(len(ds))

out = ds[idx]
img, annots = out['image'], out['bboxes']
img.shape
img = Image.fromarray(img)
annots
draw = ImageDraw.Draw(img)
for a in annots:
  draw.rectangle(a[:4], outline='red')
img

annots

[(0, (84, 75, 166, 191)),
 (4, (267, 92, 348, 204)),
 (4, (41, 329, 127, 434)),
 (1, (189, 365, 294, 440))]

Model ¶

def collate(batch):
  return tuple(zip(*batch))

class Net(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.model = V.models.detection.retinanet_resnet50_fpn(num_classes=len(classes) + 1, pretrained_backbone=True)
        self.lr = 1e-3

    def freeze(self):
        for p in self.model.parameters():
          p.requires_grad = False
        
        for p in self.model.head.parameters():
          p.requires_grad = True

    def unfreeze(self):
        for p in self.model.parameters():
          p.requires_grad = True
        
    def forward(self, x, y):
        out = self.model(x, y)
        return out
    
    def training_step(self, batch, batch_id):
        x, y = batch
        out = self(x, y)
        loss = out['bbox_regression'] + out['classification']
        self.log('train loss', loss.item())
        return loss
    
    def validation_step(self, batch, batch_nb):
        # OPTIONAL
        x, y = batch
        self.train()
        with torch.no_grad():
          out = self(x, y)
          loss = out['bbox_regression'] + out['classification']
          
        return {'val_loss': loss}

    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        self.log('val_loss', avg_loss) 
        return avg_loss

    def configure_optimizers(self):
        
        opt = torch.optim.SGD(self.parameters(), self.lr, 0.9)
        sched = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(opt, 37*3) ## changed
        return [opt], [{                                                       ## changed
                'scheduler': sched,                                            ## changed
                'interval': 'step', # The unit of the scheduler's step size    ## changed
            }]
        
    def train_dataloader(self):
        ds = CardDS()
        return DataLoader(ds, 8, collate_fn=collate, shuffle=True)

    def val_dataloader(self):
        ds = CardDS(train=False)
        val_dl = DataLoader(ds, 8, collate_fn=collate)
        return val_dl

net = Net()

Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth

Txfr learn ¶

net.freeze()

Find LR ¶

torch.cuda.is_available()

True

trainer = pl.Trainer(gpus=1)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

lrf = trainer.tuner.lr_find(net, num_training=50)  ## changed
fig = lrf.plot(suggest=True)      ## changed
fig.show()

  | Name  | Type      | Params
------------------------------------
0 | model | RetinaNet | 32 M  
/usr/local/lib/python3.6/dist-packages/torch/nn/_reduction.py:44: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.
  warnings.warn(warning.format(ret))
/usr/local/lib/python3.6/dist-packages/pytorch_lightning/utilities/distributed.py:45: UserWarning: The validation_epoch_end should not return anything as of 9.1.to log, use self.log(...) or self.write(...) directly in the LightningModule
  warnings.warn(*args, **kwargs)

Train ¶

net.lr = 3e-2

trainer = pl.Trainer(gpus=1, log_every_n_steps=1, callbacks=[pl.callbacks.LearningRateMonitor('step')])  ## changed

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

trainer.max_epochs = 30
trainer.fit(net)

trainer.save_checkpoint('txfr.pt', weights_only=True)

Fine Tune ¶

net = Net.load_from_checkpoint('txfr.pt')

net.unfreeze()

Find LR ¶

trainer = pl.Trainer(gpus=1)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

lrf = trainer.tuner.lr_find(net, num_training=50, max_lr=3e-2)  ## changed
fig = lrf.plot(suggest=True)      ## changed
fig.show()

  | Name  | Type      | Params
------------------------------------
0 | model | RetinaNet | 32 M  
/usr/local/lib/python3.6/dist-packages/torch/nn/_reduction.py:44: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.
  warnings.warn(warning.format(ret))
/usr/local/lib/python3.6/dist-packages/pytorch_lightning/utilities/distributed.py:45: UserWarning: The validation_epoch_end should not return anything as of 9.1.to log, use self.log(...) or self.write(...) directly in the LightningModule
  warnings.warn(*args, **kwargs)

fig.gca().set_ylim(1.0, 1.5)
fig

Train ¶

net.lr = 3e-3

trainer = pl.Trainer(gpus=1, log_every_n_steps=1, callbacks=[pl.callbacks.LearningRateMonitor('step')])

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

trainer.max_epochs = 25
trainer.fit(net)

Test ¶

%%capture
net.eval()
net.cuda()

df = pd.read_csv('test_labels.csv')

idx = np.random.randint(len(df))

img = Image.open(f'test/{df.filename.loc[idx]}')

img = img.resize((456, 456))
img

inp = V.transforms.functional.to_tensor(img)

inp.shape

torch.Size([3, 456, 456])

with torch.no_grad():
  out = net.model([inp.cuda()])

outidx = V.ops.nms(out[0]['boxes'], out[0]['scores'], 0.25)

outidx

tensor([106,  39,  40, 109, 157, 118, 168, 169, 170, 180,  71],
       device='cuda:0')

out[0]['scores'][outidx][:15]

tensor([0.3338, 0.3089, 0.2794, 0.2726, 0.1045, 0.0925, 0.0765, 0.0742, 0.0721,
        0.0613, 0.0554], device='cuda:0')

out[0]['labels'][outidx][:15]

tensor([4, 2, 2, 4, 5, 4, 5, 5, 5, 5, 2], device='cuda:0')

draw = ImageDraw.Draw(img)

for row in out[0]['boxes'][outidx][:4]:
  draw.rectangle(list(row.cpu().numpy()), outline='red')

img

classes

{'ace': 5, 'jack': 4, 'king': 3, 'nine': 2, 'queen': 6, 'ten': 1}

Visualize ¶

%load_ext tensorboard
%tensorboard --logdir ./lightning_logs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard

	filename	width	height	class	xmin	ymin	xmax	ymax
0	cam_image1.jpg	480	270	queen	173	24	260	137
1	cam_image1.jpg	480	270	queen	165	135	253	251
2	cam_image1.jpg	480	270	ten	255	96	337	208
3	cam_image10.jpg	960	540	ten	501	116	700	353
4	cam_image10.jpg	960	540	queen	261	124	453	370
...	...	...	...	...	...	...	...	...
522	IMG_2700.JPG	378	504	ace	74	105	303	425
523	IMG_2701.JPG	378	504	jack	64	102	313	453
524	IMG_2702.JPG	378	504	nine	85	133	290	419
525	IMG_2703.JPG	378	504	ace	82	152	277	422
526	IMG_2704.JPG	378	504	jack	70	110	301	426

	filename	width	height	class	xmin	ymin	xmax	ymax
33	cam_image35.jpg	960	540	nine	205	13	386	230
34	cam_image35.jpg	960	540	ace	189	253	388	517
35	cam_image35.jpg	960	540	jack	413	254	623	526
36	cam_image35.jpg	960	540	king	428	6	609	229