Fully Convolutional Network in Chainer

最終更新: 2017-03-31 21:14

import chainer
from chainer import cuda, Function, gradient_check, report, training, utils, Variable
from chainer import datasets, iterators, optimizers, serializers
from chainer import Link, Chain, ChainList
import chainer.functions as F
import chainer.links as L
from chainer.training import extensions
from chainer.links import VGG16Layers
from chainer.links.model.vision.vgg import prepare
from PIL import Image
import numpy as np
import copy
import argparse
import os

def load_image(path):
    img_org = Image.open(path)
    w, h = img_org.size
    img = img_org.resize(((w//32)*32, (h//32)*32))
    img = np.array(img, dtype=np.float32)
    x = prepare(img, size=None)
    x = np.expand_dims(x, axis=0)
    return x

def load_label(path):
    img_org = Image.open(path)
    w, h = img_org.size
    img = img_org.resize(((w//32)*32, (h//32)*32))
    y = np.array(img, dtype=np.int32)
    y[y==255] = -1
    y = np.expand_dims(y, axis=0)
    return y
def make_bilinear_interpolation_kernel(in_channels, out_channels, ksize):
    factor = (ksize + 1) / 2
    if ksize % 2 == 1:
        center = factor - 1
    else:
        center = factor - 0.5
    og = np.ogrid[:ksize, :ksize]
    k = (1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor)
    w = np.zeros((out_channels, in_channels, ksize, ksize)).astype(np.float32)
    w[range(out_channels), range(in_channels), :, :] = k
    return w   
class fcn32s(chainer.Chain):
    insize = 227
    nb_classes = 21
    def __init__(self):
        self.vgg16 = VGG16Layers()
        w = make_bilinear_interpolation_kernel(self.nb_classes, self.nb_classes, 64)
        super(fcn32s, self).__init__(
            score_pool5=L.Convolution2D(512, self.nb_classes, 1, stride=1, pad=0, 
                nobias=True, initialW=chainer.initializers.Zero()),
            interporlation=L.Deconvolution2D(self.nb_classes, self.nb_classes, 
            ksize=64, stride=32, pad=16, initialW=w),
        )
        self.train = True
    def __call__(self, x, t):
        feature = self.vgg16(x, layers=['pool5'])
        h = self.score_pool5(feature['pool5'])
        h = self.interporlation(h)
        if self.train:
            loss = F.softmax_cross_entropy(h, t)
            return loss
        else:
            pred = F.softmax(h)
            return pred

def model_predict(model, input_path, output_path):
    x = load_image(input_path)
    model.train = False
    pred = model(x, x).data
    pred = pred[0].argmax(axis=0).astype(np.uint8)
    img = Image.fromarray(pred, mode='P')
    palette_im = Image.open('image.png')
    img.palette = copy.copy(palette_im.palette)
    img.save(output_path)
    model.train = True

parser = argparse.ArgumentParser()
parser.add_argument('train_data')
parser.add_argument('image_dir')
parser.add_argument('label_dir')
parser.add_argument('--gpu', '-g', type=int, default=-1,
                    help='GPU ID (negative value indicates CPU')
args = parser.parse_args()

nb_data = sum(1 for line in open(args.train_data))
model = fcn32s()
if args.gpu >= 0:
    chainer.cuda.get_device(args.gpu).use()
    model.to_gpu()
optimizer = optimizers.Adam()
optimizer.use_cleargrads()
optimizer.setup(model)
for i in range(100):
    loss_epoch = 0
    with open(args.train_data) as f:
        for line in f:
            filename = line.rstrip('\n')
            path_image = os.path.join(args.image_dir, filename+'.jpg')
            path_label = os.path.join(args.label_dir, filename+'.png')
            x = load_image(path_image)
            y = load_label(path_label)
            model.cleargrads()
            loss = model(x, y)
            loss.backward()
            optimizer.update()
            loss_epoch += loss.data
    print(loss_epoch/nb_data)
    serializers.save_npz("model-{}.npz".format(i), model)