outWtSize=48
QtyFold=3
StepSize=2
TotalReduction=int(StepSize**(QtyFold))
HiddenSize=int(imgEdge/TotalReduction)

# Defining the Encoder
class Encoder(nn.Module):
    def __init__(self, in_channels=3, out_channels=outWtSize, latent_dim=200, act_fn=nn.ReLU()):
        super().__init__()

        self.net = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, padding=1),  # (96, 96)
            act_fn,
            nn.Conv2d(out_channels, out_channels, 3, padding=1),
            act_fn,
            nn.Conv2d(out_channels, 2 * out_channels, 3, padding=1, stride=2),  # (48, 48)
            act_fn,
            nn.Conv2d(2 * out_channels, 2 * out_channels, 3, padding=1),
            act_fn,
            nn.Conv2d(2 * out_channels, 4 * out_channels, 3, padding=1, stride=2),  # (24,24)
            act_fn,
            nn.Conv2d(4 * out_channels, 4 * out_channels, 3, padding=1),
            act_fn,
            nn.Conv2d(4 * out_channels, 8 * out_channels, 3, padding=1, stride=2),  # (12,12)
            act_fn,
            nn.Conv2d(8 * out_channels, 8 * out_channels, 3, padding=1),
            act_fn,
            nn.Flatten(),
            nn.Linear(TotalReduction * out_channels * HiddenSize * HiddenSize, latent_dim),
            act_fn
        )

    def forward(self, x):
        x = x.view(-1, QtyColor, imgEdge, imgEdge)
        output = self.net(x)
        return output


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1           [-1, 48, 96, 96]           1,344
              ReLU-2           [-1, 48, 96, 96]               0
            Conv2d-3           [-1, 48, 96, 96]          20,784
              ReLU-4           [-1, 48, 96, 96]               0
            Conv2d-5           [-1, 96, 48, 48]          41,568
              ReLU-6           [-1, 96, 48, 48]               0
            Conv2d-7           [-1, 96, 48, 48]          83,040
              ReLU-8           [-1, 96, 48, 48]               0
            Conv2d-9          [-1, 192, 24, 24]         166,080
             ReLU-10          [-1, 192, 24, 24]               0
           Conv2d-11          [-1, 192, 24, 24]         331,968
             ReLU-12          [-1, 192, 24, 24]               0
           Conv2d-13          [-1, 384, 12, 12]         663,936
             ReLU-14          [-1, 384, 12, 12]               0
           Conv2d-15          [-1, 384, 12, 12]       1,327,488
             ReLU-16          [-1, 384, 12, 12]               0
          Flatten-17                [-1, 55296]               0
           Linear-18                  [-1, 200]      11,059,400
             ReLU-19                  [-1, 200]               0
================================================================
Total params: 13,695,608
Trainable params: 13,695,608
Non-trainable params: 0


# Defining the Decoder
class Decoder(nn.Module):
    def __init__(self, in_channels=3, out_channels=outWtSize, latent_dim=200, act_fn=nn.ReLU()):
        super().__init__()

        self.out_channels = out_channels

        self.linear = nn.Sequential(
            nn.Linear(latent_dim, TotalReduction * out_channels * HiddenSize * HiddenSize),
            act_fn
        )

        self.conv = nn.Sequential(
            nn.ConvTranspose2d(8 * out_channels, 8 * out_channels, 3, padding=1),  # (12, 12)
            act_fn,
            nn.ConvTranspose2d(8* out_channels, 4 * out_channels, 3, padding=1, stride=2, output_padding=1),  # (24, 24)
            act_fn,
            nn.ConvTranspose2d(4 * out_channels, 4 * out_channels, 3, padding=1),
            act_fn,
            nn.ConvTranspose2d(4 * out_channels, 2 * out_channels, 3, padding=1, stride=2, output_padding=1),  # (48, 48)
            act_fn,
            nn.ConvTranspose2d(2 * out_channels, 2 * out_channels, 3, padding=1),
            act_fn,
            nn.ConvTranspose2d(2 * out_channels, out_channels, 3, padding=1, stride=2, output_padding=1),  # (96, 96)
            act_fn,
            nn.ConvTranspose2d(out_channels, out_channels, 3, padding=1),
            act_fn,
            nn.ConvTranspose2d(out_channels, in_channels, 3, padding=1)
        )

    def forward(self, x):
        output = self.linear(x)
        output = output.view(-1, TotalReduction * self.out_channels, HiddenSize, HiddenSize)
        output = self.conv(output)
        return output


# Defining the Decoder
class Decoder(nn.Module):
    def __init__(self, in_channels=3, out_channels=outWtSize, latent_dim=200, act_fn=nn.ReLU()):
        super().__init__()

        self.out_channels = out_channels

        self.linear = nn.Sequential(
            nn.Linear(latent_dim, TotalReduction * out_channels * HiddenSize * HiddenSize),
            act_fn
        )

        self.conv = nn.Sequential(
            nn.ConvTranspose2d(8 * out_channels, 8 * out_channels, 3, padding=1),  # (12, 12)
            act_fn,
            nn.ConvTranspose2d(8* out_channels, 4 * out_channels, 3, padding=1, stride=2, output_padding=1),  # (24, 24)
            act_fn,
            nn.ConvTranspose2d(4 * out_channels, 4 * out_channels, 3, padding=1),
            act_fn,
            nn.ConvTranspose2d(4 * out_channels, 2 * out_channels, 3, padding=1, stride=2, output_padding=1),  # (48, 48)
            act_fn,
            nn.ConvTranspose2d(2 * out_channels, 2 * out_channels, 3, padding=1),
            act_fn,
            nn.ConvTranspose2d(2 * out_channels, out_channels, 3, padding=1, stride=2, output_padding=1),  # (96, 96)
            act_fn,
            nn.ConvTranspose2d(out_channels, out_channels, 3, padding=1),
            act_fn,
            nn.ConvTranspose2d(out_channels, in_channels, 3, padding=1)
        )

    def forward(self, x):
        output = self.linear(x)
        output = output.view(-1, TotalReduction * self.out_channels, HiddenSize, HiddenSize)
        output = self.conv(output)
        return output


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Linear-1             [-1, 1, 55296]      11,114,496
              ReLU-2             [-1, 1, 55296]               0
              ReLU-3             [-1, 1, 55296]               0
   ConvTranspose2d-4          [-1, 384, 12, 12]       1,327,488
              ReLU-5          [-1, 384, 12, 12]               0
              ReLU-6          [-1, 384, 12, 12]               0
   ConvTranspose2d-7          [-1, 192, 24, 24]         663,744
              ReLU-8          [-1, 192, 24, 24]               0
              ReLU-9          [-1, 192, 24, 24]               0
  ConvTranspose2d-10          [-1, 192, 24, 24]         331,968
             ReLU-11          [-1, 192, 24, 24]               0
             ReLU-12          [-1, 192, 24, 24]               0
  ConvTranspose2d-13           [-1, 96, 48, 48]         165,984
             ReLU-14           [-1, 96, 48, 48]               0
             ReLU-15           [-1, 96, 48, 48]               0
  ConvTranspose2d-16           [-1, 96, 48, 48]          83,040
             ReLU-17           [-1, 96, 48, 48]               0
             ReLU-18           [-1, 96, 48, 48]               0
  ConvTranspose2d-19           [-1, 48, 96, 96]          41,520
             ReLU-20           [-1, 48, 96, 96]               0
             ReLU-21           [-1, 48, 96, 96]               0
  ConvTranspose2d-22           [-1, 48, 96, 96]          20,784
             ReLU-23           [-1, 48, 96, 96]               0
             ReLU-24           [-1, 48, 96, 96]               0
  ConvTranspose2d-25            [-1, 3, 96, 96]           1,299
================================================================
Total params: 13,750,323
Trainable params: 13,750,323
Non-trainable params: 0
----------------------------------------------------------------


# Defining the Autoencoder
class Autoencoder(nn.Module):
    def __init__(self, encoder, decoder):
        super().__init__()
        self.encoder = encoder.to(device)
        self.decoder = decoder.to(device)

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded


network = Autoencoder(Encoder(), Decoder())
optimizer = torch.optim.Adam(self.network.parameters(), lr=1e-3)


 def init_weights(module):
            if isinstance(module, nn.Conv2d):
                torch.nn.init.xavier_uniform_(module.weight)
                module.bias.data.fill_(0.01)
            elif isinstance(module, nn.Linear):
                torch.nn.init.xavier_uniform_(module.weight)
                module.bias.data.fill_(0.01)


from tqdm.notebook import tqdm

 for images, _ in tqdm(train_loader):
                # Zeroing gradients
                optimizer.zero_grad()
                
                # Reconstructing images
                output = network(images)
                
                # Computing loss
                loss = loss_function(output, images.view(-1, QtyColor, imgEdge, imgEdge))
                
                # Calculating gradients
                loss.backward()
                
                # Optimizing weights
                optimizer.step()
                
                # Collect loss value in the list train_losses[]
                train_losses.append(loss.item())
                
# Take the average loss value from the iterations of training 
# and collect this average value at each epoch

loss_trmean=np.mean(train_losses)
                
#log_loss['training_loss_per_batch'].append(loss.item())
log_loss['avg_training_loss_perEpoch'].append(loss_trmean)


 # ------------
 # TEST
 # ------------
            
    for test_images, _ in tqdm(test_loader):
        
        # disables gradient calculation 
        with torch.no_grad():
            
            #Obtain decoded image from the trained network
            output = network(test_images)
            
            #Compare the original image with the decoded image to calculate the loss value
            test_loss = loss_function(output, test_images.view(-1, QtyColor, imgEdge, imgEdge))
            
            test_losses.append(test_loss.item())
            
    loss_testmean=np.mean(test_losses)
    log_loss['avg_test_loss_perEpoch'].append(loss_testmean)


#The dropout layers were added to the encoder in this format below. :

  self.net = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, padding=1),  # (96, 96)
            act_fn,
            nn.Conv2d(out_channels, out_channels, 3, padding=1),
            act_fn,
      
            # Dropout layer
            nn.Dropout2d(p=0.2),
      
            nn.Conv2d(out_channels, 2 * out_channels, 3, padding=1, stride=2),  # (48, 48)
            act_fn,
            nn.Conv2d(2 * out_channels, 2 * out_channels, 3, padding=1),
            act_fn,
      
             # Dropout layer
            nn.Dropout2d(p=0.2),
      
            nn.Conv2d(2 * out_channels, 4 * out_channels, 3, padding=1, stride=2),  # (24,24)
            act_fn,
            nn.Conv2d(4 * out_channels, 4 * out_channels, 3, padding=1),
            act_fn,
            nn.Conv2d(4 * out_channels, 8 * out_channels, 3, padding=1, stride=2),  # (12,12)
            act_fn,
            nn.Conv2d(8 * out_channels, 8 * out_channels, 3, padding=1),
            act_fn,
            nn.Flatten(),
            nn.Linear(TotalReduction * out_channels * HiddenSize * HiddenSize, latent_dim),
            act_fn
        )
    
# The dropout layers were symmetrically added to the decoder below:

self.conv = nn.Sequential(
            nn.ConvTranspose2d(8 * out_channels, 8 * out_channels, 3, padding=1),  # (12, 12)
            act_fn,
            nn.ConvTranspose2d(8* out_channels, 4 * out_channels, 3, padding=1, stride=2, output_padding=1),  # (24, 24)
            act_fn,
            nn.ConvTranspose2d(4 * out_channels, 4 * out_channels, 3, padding=1),
            act_fn,
    
            # Dropout layer
            nn.Dropout2d(p=0.2),
    
            nn.ConvTranspose2d(4 * out_channels, 2 * out_channels, 3, padding=1, stride=2, output_padding=1),  # (48, 48)
            act_fn,

            nn.ConvTranspose2d(2 * out_channels, 2 * out_channels, 3, padding=1),
            act_fn,
    
            # Dropout layer
            nn.Dropout2d(p=0.2),
    
            nn.ConvTranspose2d(2 * out_channels, out_channels, 3, padding=1, stride=2, output_padding=1),  # (96, 96)
            act_fn,
            nn.ConvTranspose2d(out_channels, out_channels, 3, padding=1),
            act_fn,
            nn.ConvTranspose2d(out_channels, in_channels, 3, padding=1)
        )

Convolutional Autoencoder for Image Compression¶

Autoencoder¶

Convolutional Autoencoder for image compression¶

Convolution Image Size Reduction¶

Image compression.¶

Disadvantage of autoencoder¶

Loss of Fine Details:¶

Sensitivity to Input Variations:¶

Limited Generalization:¶

Computational Cost:¶

Example of an Autoencoder for Image Compression¶

Download the data set¶

(1) Tensor transformation only¶

(2) Tensor transformation plus normalization¶

Retrieve the image class names¶

Convert the tensor to the viewable image¶

Normalized images¶

Create DataLoaders¶

Initialization for the network training¶

Set Training mode¶

At each training epoch:¶

At each epoch, obtain the loss associated with test samples¶

Training Results¶