Package setup

In [ ]:
%%capture
!pip install torch==1.7.0+cu101 torchvision==0.8.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html
In [ ]:
%%capture 
!pip install pytorch-lightning einops
In [ ]:
!pip install "opencv-python-headless<4.3"
In [ ]:
# %%capture
!pip install albumentations==0.5.0

Fetch data

Using card detection data from howl0893 github repo

In [ ]:
!mkdir data
In [ ]:
%cd data
/content/data
In [ ]:
!git clone https://github.com/howl0893/custom-object-detection-datasets.git
Cloning into 'custom-object-detection-datasets'...
remote: Enumerating objects: 75, done.
remote: Counting objects: 100% (75/75), done.
remote: Compressing objects: 100% (74/74), done.
remote: Total 1060 (delta 39), reused 0 (delta 0), pack-reused 985
Receiving objects: 100% (1060/1060), 129.20 MiB | 41.63 MiB/s, done.
Resolving deltas: 100% (444/444), done.
In [ ]:
%cd /content/data/custom-object-detection-datasets/datasets/cards/images/
/content/data/custom-object-detection-datasets/datasets/cards/images

Imports

In [ ]:
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision as V
In [ ]:
import pandas as pd
In [ ]:
from PIL import Image, ImageDraw
In [ ]:
import numpy as np
In [ ]:
import albumentations as A
import cv2
In [ ]:
import pytorch_lightning as pl
In [ ]:
!pwd
/content/data/custom-object-detection-datasets/datasets/cards/images
In [ ]:
!ls
lightning_logs	test  test_labels.csv  train  train_labels.csv

Data explore

In [ ]:
f = open('train_labels.csv').read()
In [ ]:
df = pd.read_csv('train_labels.csv')
In [ ]:
df
Out[ ]:
filename width height class xmin ymin xmax ymax
0 cam_image1.jpg 480 270 queen 173 24 260 137
1 cam_image1.jpg 480 270 queen 165 135 253 251
2 cam_image1.jpg 480 270 ten 255 96 337 208
3 cam_image10.jpg 960 540 ten 501 116 700 353
4 cam_image10.jpg 960 540 queen 261 124 453 370
... ... ... ... ... ... ... ... ...
522 IMG_2700.JPG 378 504 ace 74 105 303 425
523 IMG_2701.JPG 378 504 jack 64 102 313 453
524 IMG_2702.JPG 378 504 nine 85 133 290 419
525 IMG_2703.JPG 378 504 ace 82 152 277 422
526 IMG_2704.JPG 378 504 jack 70 110 301 426

527 rows × 8 columns

In [ ]:
df[(df.filename == 'cam_image35.jpg')]
Out[ ]:
filename width height class xmin ymin xmax ymax
33 cam_image35.jpg 960 540 nine 205 13 386 230
34 cam_image35.jpg 960 540 ace 189 253 388 517
35 cam_image35.jpg 960 540 jack 413 254 623 526
36 cam_image35.jpg 960 540 king 428 6 609 229
In [ ]:
draw = ImageDraw.Draw(img)
In [ ]:
for row in df[(df.filename == 'cam_image35.jpg')].iterrows():
  # print(row[1])
  draw.rectangle([row[1].xmin, row[1].ymin, row[1].xmax, row[1].ymax], outline='red')
In [ ]:
img
Out[ ]:

Dataset

In [ ]:
classes = {'ace': 5, 'jack': 4, 'king': 3, 'nine': 2, 'queen': 6, 'ten': 1}
In [ ]:
class CardDS(Dataset):
    def __init__(self, train = True):
      self.train = train
      if train:
        self.df = pd.read_csv('train_labels.csv')
      else:
        self.df = pd.read_csv('test_labels.csv')
      self.files = self.df.filename.unique()
      self.transform = A.Compose([
          A.HorizontalFlip(p=0.5),
          A.Resize(512, 512)
      ], bbox_params=A.BboxParams(format='pascal_voc'))

    def __len__(self):
      return len(self.files)

    def __getitem__(self, idx):
      f = self.files[idx]
      if self.train:
        img = Image.open(f'train/{f}')
        # img = V.io.read_image(f'train/{f}')
      else:
        img = Image.open(f'test/{f}')
        # img = V.io.read_image(f'test/{f}')
      img = np.array(img)

      annots = []
      for row in self.df[self.df.filename == f].iterrows():
        # print(row)
        annots += [(row[1].xmin, row[1].ymin, row[1].xmax, row[1].ymax, classes[row[1]['class']])]
      
      out = self.transform(image=img, bboxes=annots)
      img = V.transforms.functional.to_tensor(out['image'])
      out['labels'] = torch.Tensor([x[-1] for x in out['bboxes']]).long()
      out['boxes'] = torch.Tensor([x[:4] for x in out['bboxes']])
      out.pop('image')
      out.pop('bboxes')

      return img, out
In [ ]:
ds = CardDS()
In [ ]:
len(ds)
Out[ ]:
296
In [ ]:
ds.df['class'].value_counts()
Out[ ]:
king     95
ace      92
nine     89
jack     89
ten      82
queen    80
Name: class, dtype: int64
In [ ]:
idx = np.random.randint(len(ds))

out = ds[idx]
img, annots = out['image'], out['bboxes']
img.shape
img = Image.fromarray(img)
annots
draw = ImageDraw.Draw(img)
for a in annots:
  draw.rectangle(a[:4], outline='red')
img
In [ ]:
annots
Out[ ]:
[(0, (84, 75, 166, 191)),
 (4, (267, 92, 348, 204)),
 (4, (41, 329, 127, 434)),
 (1, (189, 365, 294, 440))]

Model

In [ ]:
def collate(batch):
  return tuple(zip(*batch))
In [ ]:
class Net(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.model = V.models.detection.retinanet_resnet50_fpn(num_classes=len(classes) + 1, pretrained_backbone=True)
        self.lr = 1e-3

    def freeze(self):
        for p in self.model.parameters():
          p.requires_grad = False
        
        for p in self.model.head.parameters():
          p.requires_grad = True

    def unfreeze(self):
        for p in self.model.parameters():
          p.requires_grad = True
        
    def forward(self, x, y):
        out = self.model(x, y)
        return out
    
    def training_step(self, batch, batch_id):
        x, y = batch
        out = self(x, y)
        loss = out['bbox_regression'] + out['classification']
        self.log('train loss', loss.item())
        return loss
    
    def validation_step(self, batch, batch_nb):
        # OPTIONAL
        x, y = batch
        self.train()
        with torch.no_grad():
          out = self(x, y)
          loss = out['bbox_regression'] + out['classification']
          
        return {'val_loss': loss}

    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        self.log('val_loss', avg_loss) 
        return avg_loss

    def configure_optimizers(self):
        
        opt = torch.optim.SGD(self.parameters(), self.lr, 0.9)
        sched = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(opt, 37*3) ## changed
        return [opt], [{                                                       ## changed
                'scheduler': sched,                                            ## changed
                'interval': 'step', # The unit of the scheduler's step size    ## changed
            }]
        
    def train_dataloader(self):
        ds = CardDS()
        return DataLoader(ds, 8, collate_fn=collate, shuffle=True)

    def val_dataloader(self):
        ds = CardDS(train=False)
        val_dl = DataLoader(ds, 8, collate_fn=collate)
        return val_dl
In [ ]:
net = Net()
Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth

Txfr learn

In [ ]:
net.freeze()

Find LR

In [ ]:
torch.cuda.is_available()
Out[ ]:
True
In [ ]:
trainer = pl.Trainer(gpus=1)
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
In [ ]:
lrf = trainer.tuner.lr_find(net, num_training=50)  ## changed
fig = lrf.plot(suggest=True)      ## changed
fig.show() 
  | Name  | Type      | Params
------------------------------------
0 | model | RetinaNet | 32 M  
/usr/local/lib/python3.6/dist-packages/torch/nn/_reduction.py:44: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.
  warnings.warn(warning.format(ret))
/usr/local/lib/python3.6/dist-packages/pytorch_lightning/utilities/distributed.py:45: UserWarning: The validation_epoch_end should not return anything as of 9.1.to log, use self.log(...) or self.write(...) directly in the LightningModule
  warnings.warn(*args, **kwargs)

Train

In [ ]:
net.lr = 3e-2
In [ ]:
trainer = pl.Trainer(gpus=1, log_every_n_steps=1, callbacks=[pl.callbacks.LearningRateMonitor('step')])  ## changed
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
In [ ]:
trainer.max_epochs = 30
trainer.fit(net)
In [ ]:
trainer.save_checkpoint('txfr.pt', weights_only=True)

Fine Tune

In [ ]:
net = Net.load_from_checkpoint('txfr.pt')
In [ ]:
net.unfreeze()

Find LR

In [ ]:
trainer = pl.Trainer(gpus=1)
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
In [ ]:
lrf = trainer.tuner.lr_find(net, num_training=50, max_lr=3e-2)  ## changed
fig = lrf.plot(suggest=True)      ## changed
fig.show() 
  | Name  | Type      | Params
------------------------------------
0 | model | RetinaNet | 32 M  
/usr/local/lib/python3.6/dist-packages/torch/nn/_reduction.py:44: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.
  warnings.warn(warning.format(ret))
/usr/local/lib/python3.6/dist-packages/pytorch_lightning/utilities/distributed.py:45: UserWarning: The validation_epoch_end should not return anything as of 9.1.to log, use self.log(...) or self.write(...) directly in the LightningModule
  warnings.warn(*args, **kwargs)
In [ ]:
fig.gca().set_ylim(1.0, 1.5)
fig

Train

In [ ]:
net.lr = 3e-3
In [ ]:
trainer = pl.Trainer(gpus=1, log_every_n_steps=1, callbacks=[pl.callbacks.LearningRateMonitor('step')])
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
In [ ]:
trainer.max_epochs = 25
trainer.fit(net)

Test

In [ ]:
%%capture
net.eval()
net.cuda()
In [ ]:
df = pd.read_csv('test_labels.csv')
In [ ]:
idx = np.random.randint(len(df))
In [ ]:
img = Image.open(f'test/{df.filename.loc[idx]}')
In [ ]:
img = img.resize((456, 456))
img
Out[ ]:
In [ ]:
inp = V.transforms.functional.to_tensor(img)
In [ ]:
inp.shape
Out[ ]:
torch.Size([3, 456, 456])
In [ ]:
with torch.no_grad():
  out = net.model([inp.cuda()])
In [ ]:
outidx = V.ops.nms(out[0]['boxes'], out[0]['scores'], 0.25)
In [ ]:
outidx
Out[ ]:
tensor([106,  39,  40, 109, 157, 118, 168, 169, 170, 180,  71],
       device='cuda:0')
In [ ]:
out[0]['scores'][outidx][:15]
Out[ ]:
tensor([0.3338, 0.3089, 0.2794, 0.2726, 0.1045, 0.0925, 0.0765, 0.0742, 0.0721,
        0.0613, 0.0554], device='cuda:0')
In [ ]:
out[0]['labels'][outidx][:15]
Out[ ]:
tensor([4, 2, 2, 4, 5, 4, 5, 5, 5, 5, 2], device='cuda:0')
In [ ]:
draw = ImageDraw.Draw(img)
In [ ]:
for row in out[0]['boxes'][outidx][:4]:
  draw.rectangle(list(row.cpu().numpy()), outline='red')
In [ ]:
img
Out[ ]:
In [ ]:
classes
Out[ ]:
{'ace': 5, 'jack': 4, 'king': 3, 'nine': 2, 'queen': 6, 'ten': 1}

Visualize

In [ ]:
%load_ext tensorboard
%tensorboard --logdir ./lightning_logs
The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
In [ ]: