Pytorch Notes

1 Tensor

Example

import torch
import torchvision
import torchvision.transforms as transforms

transform = transforms.Compose(
    [transforms.ToTensor(), # simple to divide by sum(here is 256)
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # Normalized_image=(image-mean)/std -- [-1,1] mean std
     # transforms.RandomVerticalFlip(p=0.5)
    ]
)

# trainset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2) # per epoch -shuffle, 线程

# testset
testset = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform, download=True)

testloader = torch.utils.data.DataLoader(testset, batch_size=4,shuffle=False, num_workers=2)

# 加载私人数据集
# privateset = torchvision.datasets.ImageFolder(root=image_path, transform=transform)
# pri_dataloader = torch.utils.data.DataLoader(privateset, batch_size=4, shuffle=False, num_workers=2)

import matplotlib.pyplot as plt
import numpy as np


# def imshow(img):
#     # input data: torch.tensor [c, h, w]
#     img = img / 2 + 0.5
#     nping = img.numpy()
#     nping = np.transpose(nping, (1,2,0)) # [h, w, c]
#     plt.imshow(nping)

# dataiter = iter(trainloader) # 随机加载一个mini-batch
# images, labels = dataiter.next()

# imshow(torchvision.utils.make_grid(images))

import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    '''
    define neural network structure input-data:3 x 32 x 32 (channel * resolution)
    '''
    def __init__(self):
        super(Net, self).__init__()
        # ConvNet_1
        self.conv1 = nn.Conv2d(3, 6, 3) # (input channel, output channel, kernel size)
        # ConvNet_2
        self.conv2 = nn.Conv2d(6, 16, 3) # kernel size = 3 x 3
        # FC_1
        self.fc1 = nn.Linear(16*28*28, 512) # input-scale:12544(16*28*28)
        # FC_2
        self.fc2 = nn.Linear(512, 64)
        # FC_3
        self.fc3 = nn.Linear(64, 10) # output-scale:10

    def forward(self, x):
        """
        define the data flow direction 定义数据流向
        :param x: input
        :return: the final x
        """
        x = self.conv1(x)
        x = F.relu(x)

        x = self.conv2(x)
        x = F.relu(x)

        # FC: the data scale has changed
        x = x.view(-1, 16*28*28)
        x = self.fc1(x)
        x = F.relu(x)

        x = self.fc2(x)
        x = F.relu(x)

        x = self.fc3(x)

        return x

if __name__ == '__main__':
    net = Net()
    print(net)
    import torch.optim as optim

    # 用来更新网络权值规则的库
    criterion = nn.CrossEntropyLoss()  # 交叉熵
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

    # show history of the training indexes path
    # tarin_loss_hist = []
    # test_loss_hist = []
    for epoch in range(20):
        # use mini batch
        for i, data in enumerate(trainloader):  # data: images + labels
            images, labels = data

            outputs = net(images)

            loss = criterion(outputs, labels)

            # update weights
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # show loss
            if (i % 1000 == 0): # mini-batch determined by trainloader | per 1000 mini-batch output onece
                print('Epoch: %d, Step: %d, Loss: %.3f' % (epoch, i, loss.item())) # loss: loss.item()

    # test model
    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for data in testloader:
            images, labels = data # one data set

            outputs = net(images)

            # torch.max: https://blog.csdn.net/Z_lbj/article/details/79766690 
            _, predicted = torch.max(outputs.data, 1) # torch.max(a,1) 返回每一行中最大值的那个元素,且返回其索引(返回最大元素在这一行的列索引)| 每一行只有一个
            correct += (predicted == labels).sum() # mini-batch - sum
            total += labels.size(0) # size(0) - np.shape

    print('correct: ', float(correct/total))

Use Pytorch as Numpy

GPU-accelerated Tensor & dynamic-build network

MAIN-COMPONENT is Tensor

import tensor
import numpy as np
# CREATE a NUMPY ndarray
numpy_tensor = np.random.randn(10, 20)

Return : ndarray or float

A (d0, d1, ..., dn)-shaped array of floating-point samples from the standard normal distribution, or a single such float if no parameters were supplied.

1.1 CONVERT NUMPY to TENSOR

# CONVERT NUMPY to TENSOR
pytorch_tensor1 = torch.Tensor(numpy_tensor)
pytorch_tensor2 = torch.from_numpy(numpy_tensor)

1.2 CONVERT TENSOR to NUMPY

# CONVERT TENSOR to NUMPY
# absolutely add 'dot-numpy()'
# IF cpu
numpy_array = pytorch_tensor1.numpy()
# IF gpu
numpy_array = pytorch_tensor2.cpu().numpy()

Note

Tensor on the GPU cannot be directly converted to NumPy ndarray and needs to be used . cpu()first transfers Tensor on the GPU to the CPU.

1.3 Tensor on GPU

put Tensor on GPU

# VERSION1 : def CUDA data type [default]先定义dtype,然后再传入
dtype = torch.cuda.FloatTensor 
gpu_tensor = torch.randn(10, 20).type(dtype)
# VERSION2: SIMPLE & POPULAR
gpu_tensor = torch.randn(10, 20).cuda(0) # tensor on GPU1
gpu_tensor = torch.randn(10, 20).cuda(1) # tensor on GPU2

fetch back on cpu

# fetch back on cpu
cpu_tensor = gpu_tensor.cpu()

1.4 Tensor Attribute

Size

print(pytorch_tensor1.shape)  # Attention
print(pytorch_tensor2.size())

>>> torch.Size([10, 20])
    torch.Size([10, 20])

Type

print(pytorch_tensor1.type()) 

>>> torch.cuda.FloatTensor

Dimension

print(pytorch_tensor1.dim())

>>> 2

Number

print(pytorch_tensor1.numel())

>>> 200

Try

tensor_init = torch.randn((3, 2))
tensor = tensor_init.type(torch.DoubleTensor)
x_array = tensor.numpy()
print(x_array.dtype)

>>> float64

1.5 Tensor Operation

Just like Numpy

torch.ones

x = torch.ones(2, 2)
print(x) # float tensor

>>>  
tensor([[1., 1.],
        [1., 1.]])

type()

print(x.type())

>>> torch.FloatTensor

long() torch.LongTensor

x = x.long()
# x = x.type(torch.LongTensor)
print(x)

>>>
tensor([[1, 1],
        [1, 1]])

float() torch.FloatTensor

x = x.float()
# x = x.type(torch.FloatTensor)
print(x)

torch.randn(a, b)

x = torch.randn(4, 3)
print(x)

>>>
tensor([[ 0.3291,  2.2839, -0.2401],
        [ 0.5324,  0.9681,  0.2163],
        [ 0.6263, -0.3329,  1.6206],
        [ 0.5429, -1.8231, -1.1917]])

torch.max

max_value, max_idx = torch.max(x, dim=1)
print(max_value)
print(max_idx)

>>>
tensor([2.2839, 0.9681, 1.6206, 0.5429])
tensor([1, 1, 2, 0])

dim = 1 per row biggest 每一行最大值

dim = 0 per categories biggest 每一列最大值

torch.sum

sum_x = torch.sum(x, dim=1)
print(sum_x)

>>>
tensor([ 2.3730,  1.7168,  1.9140, -2.4719])1

torch.unsqueeze

x.unsqueeze(i) # the ist dimension plus

x.squeeze(i) # the ist dimension reduce

x = x.squeeze() # 􏱝1-D tensor 􏰝􏱁􏰰􏰄􏰘􏲡􏹢􏹣􏱊􏹤􏹥is completely removed

tensor([[-0.0255,  1.3384,  0.5698],
        [ 0.5936, -0.1986,  1.3338],
        [-1.6849,  0.3457,  1.9582],
        [ 1.0653, -0.9994,  0.0824]])
print(x.shape) # torch.Size([4, 3])
x = x.unsqueeze(0) # the 1st dim plus 
# torch.Size([1, 4, 3])
print(x)

>>>
tensor([[[-0.0255,  1.3384,  0.5698],
         [ 0.5936, -0.1986,  1.3338],
         [-1.6849,  0.3457,  1.9582],
         [ 1.0653, -0.9994,  0.0824]]])
x = x.unsqueeze(1) # the 2st dim plus
# torch.Size([1, 1, 4, 3])

permute & transpose

Permute can rearrange the dimensions of tensor

Transpose exchanges two dimensions in tensor

x = torch.randn(3, 4, 5) # torch.Size([3, 4, 5])

# Dimensional exchange
x = x.permute(1, 0, 2) # torch.Size([4, 3, 5])

x = x.transpose(0, 2) # torch.Size([5, 3, 4])

view

‘view’ to reshape Tensor

x = torch.randn(3, 4, 5)
print(x.shape)

>>>
torch.Size([3, 4, 5])

x = x.view(-1, 5) # torch.Size([12, 5])

x = x.view(3, 20) # torch.Size([3, 20])

add

torch.add(x, y)

x = torch.randn(3, 4)
y = torch.randn(3, 4)

# add two Tensor
z = x + y
# z = torch.add(x, y)

inplace

Review the previous

print(x.shape) # torch.Size([4, 3])
x = x.unsqueeze(0) # the 1st dim plus 
# torch.Size([1, 4, 3])
x = torch.ones(3, 3) # torch.Size([3, 3])

# INPLACE TO unsqueeze 
x.unsqueeze_(0) # x = x.unsqueeze(0) torch.Size([1, 3, 3])

# INPLACE TO inplace
x.transpose_(1, 0) # torch.Size([3, 1, 3])

X = torch.ones(3, 3)
Y = torch.ones(3, 3)
X.add_(Y) # X = X + Y || X = torch.add(X, Y)

Try

Create a float32, 4 x 4 all-one matrix, and modify the matrix in the middle of the matrix 2 x 2, all to 2

$$\left[ \begin{array} { l l l l } { 1 } & { 1 } & { 1 } & { 1 } \\ { 1 } & { 2 } & { 2 } & { 1 } \\ { 1 } & { 2 } & { 2 } & { 1 } \\ { 1 } & { 1 } & { 1 } & { 1 } \end{array} \right]$$
x = torch.ones(4, 4)
x[1:3, 1: 3] = 2

2 Variable

Tensor is a perfect component in Pytorch, but building a neural network is not enough. We need a tensor that can build a computational graph. This is Variable. Variable is a wrapper around tensor, and the operation is the same as tensor, but each Variable has three properties, tensor in Variable .data itself, gradient corresponding to tensor .grad, and how this Variable is obtained.grad_fn

Variable-properties

import torch
import numpy as np
from torch.autograd import Variable
x_tensor = torch.randn(10, 5)
y_tensor = torch.randn(10, 5)

2.1 CONVERT TENSOR to Variable

The default variable does not need to be gradient, so we use this method to declare that we need to find the gradient

# default variable does not need to be gradient, 
# so we use this method to declare that we need to find the gradient
x = Variable(x_tensor, requires_grad = True)
y = Variable(y_tensor, requires_grad = True)

z = torch.sum(x + y) # Attention: elements-wise plus
print('z', z)
# Variable.data
print('\nz.data', z.data)
# Variable.grad_fn
print('\nz.grad_fn', z.grad_fn)
z tensor(-0.2191, grad_fn=<SumBackward0>)

z.data tensor(-0.2191)

z.grad_fn <SumBackward0 object at 0x7f5e8a084d30>

2.2 backward automatic derivation

# Find the gradient of x and y  求 x 和 y 的梯度
z.backward()

print(x.grad)
print(y.grad)
# Using the automatic derivation mechanism provided by PyTorch
tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]])
tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]])

2.3 Try

Try to construct a function y= x2 and then find the derivative of x=2

import matplotlib.pyplot as plt
x = np.arange(-3, 3.01, 0.1) # np.arange(BEGIN, END, STEP)
y = x**2
plt.plot(x, y)
plt.plot(2, 4, 'ro') # 'red' & 'o'
[<matplotlib.lines.Line2D at 0x7f5e8495afd0>]

output

x = Variable(torch.FloatTensor([2]), requires_grad=True)
y = x ** 2
y.backward()
print(x.grad)
tensor([2.], requires_grad=True)

2.4 Appendix - view

b = torch.arange(4 * 5 * 6).view(4, 5, 6)
b
tensor([[[  0,   1,   2,   3,   4,   5],
         [  6,   7,   8,   9,  10,  11],
         [ 12,  13,  14,  15,  16,  17],
         [ 18,  19,  20,  21,  22,  23],
         [ 24,  25,  26,  27,  28,  29]],

        [[ 30,  31,  32,  33,  34,  35],
         [ 36,  37,  38,  39,  40,  41],
         [ 42,  43,  44,  45,  46,  47],
         [ 48,  49,  50,  51,  52,  53],
         [ 54,  55,  56,  57,  58,  59]],

        [[ 60,  61,  62,  63,  64,  65],
         [ 66,  67,  68,  69,  70,  71],
         [ 72,  73,  74,  75,  76,  77],
         [ 78,  79,  80,  81,  82,  83],
         [ 84,  85,  86,  87,  88,  89]],

        [[ 90,  91,  92,  93,  94,  95],
         [ 96,  97,  98,  99, 100, 101],
         [102, 103, 104, 105, 106, 107],
         [108, 109, 110, 111, 112, 113],
         [114, 115, 116, 117, 118, 119]]])

3 automatic derivation

Automated derivation is a very important feature in PyTorch, which allows us to avoid manually calculating very complex derivatives, which can greatly reduce the time we build the model, which is not a feature of its predecessor, the Torch framework.

import torch
# torch.autograd
from torch.autograd import Variable

3.1 Simple Cases

The “simple” embodiment of the calculation is a scalar, that is, a number, we automatically derive this scalar.

“简单“体现在计算的结果都是标量,也就是一个数,我们对这个标量进行自动求导。

Verify Function of Automatic Derivation

x = Variable(torch.Tensor([2]), requires_grad=True)
y = x + 2
z = y ** 2 + 3
print(z)
tensor([19.], grad_fn=<AddBackward0>)

Through the above column operations, we get the final result out, representing it as a mathematical formula$$z = ( x + 2 ) ^ { 2 } + 3$$
Then the result of our derivation from z to x is$$\frac{ \partial z } { \partial x } = 2 ( x + 2 ) = 2 ( 2 + 2 ) = 8$$

# Use automatic derivation
z.backward()
print(x.grad) # BACKWARD to Z, GRAD to x
tensor([8.])

Convenient Right?

More Complicated example:

x = Variable(torch.randn(10, 20), requires_grad=True)
y = Variable(torch.randn(10, 5), requires_grad=True)
w = Variable(torch.randn(20, 5), requires_grad=True)

out = torch.mean(y - torch.matmul(x, w)) # torch.matmul -- Matrix multiple
out.backward()
# get the gradient of x y w
print('x.grad', x.grad)
print('\ny.grad', y.grad)
print('\nw.grad', w.grad)
x.grad tensor([[ 0.0633,  0.0355, -0.0060, -0.0336, -0.0119,  0.0798,  0.0388,  0.0087,
         -0.0505, -0.0557,  0.0231, -0.0929,  0.0838, -0.0613, -0.0386, -0.0656,
         -0.0167, -0.0023,  0.0108, -0.0152],
        [ 0.0633,  0.0355, -0.0060, -0.0336, -0.0119,  0.0798,  0.0388,  0.0087,
         -0.0505, -0.0557,  0.0231, -0.0929,  0.0838, -0.0613, -0.0386, -0.0656,
         -0.0167, -0.0023,  0.0108, -0.0152],
        [ 0.0633,  0.0355, -0.0060, -0.0336, -0.0119,  0.0798,  0.0388,  0.0087,
         -0.0505, -0.0557,  0.0231, -0.0929,  0.0838, -0.0613, -0.0386, -0.0656,
         -0.0167, -0.0023,  0.0108, -0.0152],
        [ 0.0633,  0.0355, -0.0060, -0.0336, -0.0119,  0.0798,  0.0388,  0.0087,
         -0.0505, -0.0557,  0.0231, -0.0929,  0.0838, -0.0613, -0.0386, -0.0656,
         -0.0167, -0.0023,  0.0108, -0.0152],
        [ 0.0633,  0.0355, -0.0060, -0.0336, -0.0119,  0.0798,  0.0388,  0.0087,
         -0.0505, -0.0557,  0.0231, -0.0929,  0.0838, -0.0613, -0.0386, -0.0656,
         -0.0167, -0.0023,  0.0108, -0.0152],
        [ 0.0633,  0.0355, -0.0060, -0.0336, -0.0119,  0.0798,  0.0388,  0.0087,
         -0.0505, -0.0557,  0.0231, -0.0929,  0.0838, -0.0613, -0.03

   Reprint policy


《Pytorch Notes》 by David Qiao is licensed under a Creative Commons Attribution 4.0 International License
  TOC