Discrete Cosine Transform

Code

import numpy as np
import matplotlib.pyplot as plt
import gradio as gr
from skimage import data
from io import BytesIO
import PIL.Image
import plotly.graph_objects as go
from typing import Optional

1 Introduction

This notebook illustrates the concepts behind JPEG compression by employing the 2D Discrete Cosine Transform (DCT) on a grayscale image. The algorithm operates by segmenting the image into smaller blocks, computing the DCT coefficients for each, and performing thresholding to zero out insignificant high-frequency coefficients. This process takes advantage of the fact that many coefficients, particularly those representing high frequencies, can be negligible and discarded without significantly affecting the perceived image quality, thus facilitating compression.

Image Partitioning

For an image \(A \in \mathbb{R}^{M \times N}\), we divide it into non-overlapping blocks of size \(n \times n\) (typically \(n = 8\)).

2D DCT for a Block

The transform for a block \(B\) is defined as:

\[ c_{k,l} = \alpha(k)\alpha(l) \sum_{r=0}^{n-1} \sum_{s=0}^{n-1} B_{r,s} \cos\left(\frac{\pi (2r+1)k}{2n}\right) \cos\left(\frac{\pi (2s+1)l}{2n}\right) \]

where the scaling factor \(\alpha(k)\) is given by:

\[ \alpha(k) = \begin{cases} \sqrt{\frac{1}{n}}, & k = 0, \\ \sqrt{\frac{2}{n}}, & k > 0. \end{cases} \]

Inverse DCT (IDCT)

Reconstructed via:

\[ B_{r,s} = \sum_{k=0}^{n-1} \sum_{l=0}^{n-1} \alpha(k)\alpha(l) \, c_{k,l} \cos\left(\frac{\pi (2r+1)k}{2n}\right) \cos\left(\frac{\pi (2s+1)l}{2n}\right). \]

Thresholding

Through thresholding, small coefficients are nullified, maintaining core low-frequency content while reducing data size.

2 Python Implementation

The dct_matrix function constructs the DCT transformation matrix \(T\) with dimensions \(n \times n\), whose elements are derived as:

\[ T_{k,r} = \alpha(k) \cos\left(\frac{\pi (2r+1)k}{2n}\right) \]

The orthonormal property of \(T\) ensures that the DCT can be expressed as:

\[ C = T \, B \, T^\top \]

Consequently, we retrieve the original block through:

\[ B = T^\top \, C \, T \]

Code

def dct_matrix(n: int) -> np.ndarray:
    """
    Generate the DCT transformation matrix of size n x n.

    Args:
        n: Block size.

    Returns:
        A numpy array representing the DCT matrix.
    """
    T = np.empty((n, n))
    factor = np.pi / (2 * n)
    for k in range(n):
        for r in range(n):
            alpha = np.sqrt(1 / n) if k == 0 else np.sqrt(2 / n)
            T[k, r] = alpha * np.cos((2 * r + 1) * k * factor)
    return T


def dct2(block: np.ndarray) -> np.ndarray:
    """
    Compute the 2D DCT of an n x n block.

    Args:
        block: A 2D numpy array.

    Returns:
        The DCT coefficients as a 2D numpy array.
    """
    n = block.shape[0]
    T = dct_matrix(n)
    return T @ block @ T.T


def idct2(coeff: np.ndarray) -> np.ndarray:
    """
    Compute the 2D inverse DCT of an n x n coefficient block.

    Args:
        coeff: A 2D numpy array of DCT coefficients.

    Returns:
        The reconstructed block as a 2D numpy array.
    """
    n = coeff.shape[0]
    T = dct_matrix(n)
    return T.T @ coeff @ T

To decompress an image, it is segmented into blocks, which are processed individually before reassembling the full image.

Code

def image_to_blocks(img: np.ndarray, block_size: int) -> list:
    """
    Decompose the image into non-overlapping blocks of size block_size x block_size.

    Args:
        img: A 2D numpy array representing the grayscale image.
        block_size: The size of each block.

    Returns:
        A list of lists of blocks.
    """
    M, N = img.shape
    blocks = []
    for i in range(0, M, block_size):
        row = []
        for j in range(0, N, block_size):
            block = img[i : i + block_size, j : j + block_size]
            # Pad if necessary so that every block is block_size x block_size.
            if block.shape != (block_size, block_size):
                block = np.pad(
                    block,
                    (
                        (0, block_size - block.shape[0]),
                        (0, block_size - block.shape[1]),
                    ),
                    mode="constant",
                )
            row.append(block.astype(np.float32))
        blocks.append(row)
    return blocks


def blocks_to_image(blocks: list) -> np.ndarray:
    """
    Reconstruct the image from blocks.

    Args:
        blocks: A list of lists of 2D numpy arrays.

    Returns:
        The reconstructed image as a 2D numpy array.
    """
    row_images = [np.hstack(row) for row in blocks]
    return np.vstack(row_images)


def apply_dct_to_blocks(blocks: list) -> list:
    """
    Apply 2D DCT to each image block.

    Args:
        blocks: A list of lists of image blocks.

    Returns:
        A list of lists with the DCT-transformed blocks.
    """
    return [[dct2(block) for block in row] for row in blocks]


def apply_idct_to_blocks(blocks: list) -> list:
    """
    Apply 2D inverse DCT to each block.

    Args:
        blocks: A list of lists of DCT coefficient blocks.

    Returns:
        A list of lists with the inverse-transformed blocks.
    """
    return [[idct2(block) for block in row] for row in blocks]

The threshold_coefficients function implements the above-discussed thresholding, enabling compression by zeroing out small coefficients while preserving important information.

Code

def threshold_coefficients(dct_blocks: list, threshold: float) -> tuple:
    """
    Apply thresholding to DCT coefficients in each block.

    Args:
        dct_blocks: A list of lists of DCT coefficient blocks.
        threshold: The threshold value. Coefficients with absolute value below this are set to zero.

    Returns:
        A tuple containing:
         - The thresholded blocks.
         - The total count of nonzero coefficients.
    """
    nonzero_count = 0
    new_blocks = []
    for row in dct_blocks:
        new_row = []
        for block in row:
            mask = np.abs(block) > threshold
            nonzero_count += np.count_nonzero(mask)
            new_row.append(block * mask)
        new_blocks.append(new_row)
    return new_blocks, nonzero_count

Code

def load_input_image(
    uploaded_image: Optional[object], sample_choice: str
) -> np.ndarray:
    """
    Load the input image from an upload or sample selection and convert it to grayscale.

    Args:
        uploaded_image: The uploaded image (as a numpy array or PIL.Image) or None.
        sample_choice: The name of the sample image to use if no upload is provided.

    Returns:
        A 2D numpy array representing the grayscale image.
    """
    if uploaded_image is not None:
        # Check if the uploaded image is a numpy array
        if isinstance(uploaded_image, np.ndarray):
            img = uploaded_image
        else:
            # Assume it's a PIL image
            img = np.array(uploaded_image)
        # Convert to grayscale if needed (if image has 3 channels)
        if img.ndim == 3:
            if img.shape[2] >= 3:
                # Use standard luminance conversion
                img = np.dot(img[..., :3], [0.2989, 0.5870, 0.1140])
            else:
                img = img[..., 0]
        return img.astype(np.uint8)
    else:
        # Load sample image from skimage.data
        if sample_choice.lower() == "cameraman":
            return data.camera()
        elif sample_choice.lower() == "coins":
            return data.coins()
        elif sample_choice.lower() == "moon":
            return data.moon()
        else:
            # Default to cameraman if unknown choice.
            return data.camera()

Code

def plotly_heatmap(coeff_block: np.ndarray) -> go.Figure:
    """
    Generate a Plotly heatmap for a given DCT coefficient block.

    Args:
        coeff_block: A 2D numpy array of DCT coefficients.

    Returns:
        A Plotly Figure object displaying the heatmap.
    """
    fig = go.Figure(data=go.Heatmap(z=coeff_block, colorscale="Viridis"))
    fig.update_layout(
        title="DCT Coefficients (First Block)",
        xaxis_title="Coefficient index",
        yaxis_title="Coefficient index",
        margin=dict(l=20, r=20, t=40, b=20),
    )
    return fig

process_image orchestrates the entire workflow: from loading the image, transforming it with DCT, thresholding, reconstructing using IDCT, and generating the heatmap and statistics.

Code

def process_image(
    threshold: float,
    block_size: int = 8,
    uploaded_image: Optional[object] = None,
    sample_choice: str = "cameraman",
) -> tuple:
    """
    Process the input image using block-wise 2D DCT, thresholding, and IDCT reconstruction.

    Args:
        threshold: The threshold value for the DCT coefficients.
        block_size: The size of each block (default 8).
        uploaded_image: The uploaded image (if any).
        sample_choice: The sample image to use if no upload is provided.

    Returns:
        A tuple containing:
         - The original image (as a 2D numpy array).
         - The reconstructed image after thresholding.
         - A Plotly Figure object for the DCT coefficients heatmap (first block).
         - A JSON-serializable dictionary with compression statistics.
    """
    # Load input image (upload takes precedence over sample choice)
    img = load_input_image(uploaded_image, sample_choice)
    M, N = img.shape
    # Crop the image so that its dimensions are multiples of block_size
    M_new = (M // block_size) * block_size
    N_new = (N // block_size) * block_size
    img = img[:M_new, :N_new]

    # Partition the image into blocks
    blocks = image_to_blocks(img, block_size)

    # Apply the 2D DCT to each block
    dct_blocks = apply_dct_to_blocks(blocks)

    total_coeffs = M_new * N_new  # one coefficient per pixel

    # Threshold the DCT coefficients
    thresh_blocks, nonzero_count = threshold_coefficients(dct_blocks, threshold)

    # Reconstruct the image using inverse DCT
    idct_blocks = apply_idct_to_blocks(thresh_blocks)
    reconstructed = blocks_to_image(idct_blocks)

    # Prepare a summary of the compression statistics
    summary = {
        "block_size": block_size,
        "image_shape": {"rows": int(M_new), "cols": int(N_new)},
        "total_coefficients": int(total_coeffs),
        "nonzero_coefficients": int(nonzero_count),
        "compression_ratio": float(nonzero_count / total_coeffs),
    }

    # Generate a responsive Plotly heatmap for the DCT coefficients of the first block
    first_block = dct_blocks[0][0]
    heatmap_fig = plotly_heatmap(first_block)

    return img, reconstructed, heatmap_fig, summary

3 Interactive Dashboard

The dashboard below enables the adjustment of various compression-related parameters:

Threshold: Minimum coefficient value retained during compression.
Block Size: Controls the granularity of the transformation.
Image Input: Upload an image or select from sample options like cameraman, coins, or moon.

You’ll observe:

Original vs Reconstructed Images: Visual comparison post-compression.
DCT Coefficients Heatmap: First block coefficient visualization using Plotly.
Compression Statistics: JSON data illustrating nonzero coefficients and compression ratios.

Code

def gradio_interface(
    threshold: float,
    block_size: int,
    uploaded_image: Optional[object],
    sample_choice: str,
) -> tuple:
    """
    Gradio interface function to process the image and return outputs.

    Args:
        threshold: The threshold value for DCT coefficients.
        block_size: The block size for image decomposition.
        uploaded_image: The uploaded image (if provided).
        sample_choice: The sample image to use if no upload is provided.

    Returns:
        A tuple containing:
         - A combined image (side-by-side original and reconstructed).
         - A Plotly Figure for the DCT coefficients heatmap.
         - A dictionary with compression statistics.
    """
    original, reconstructed, heatmap_fig, summary = process_image(
        threshold, block_size, uploaded_image, sample_choice
    )

    # Create a side-by-side visualization of the original and reconstructed images.
    fig, axes = plt.subplots(1, 2, figsize=(10, 5))
    axes[0].imshow(original, cmap="gray")
    axes[0].set_title("Original Image")
    axes[0].axis("off")
    axes[1].imshow(reconstructed, cmap="gray")
    axes[1].set_title("Reconstructed Image")
    axes[1].axis("off")
    plt.tight_layout()
    buf = BytesIO()
    plt.savefig(buf, format="png")
    buf.seek(0)
    combined_img = PIL.Image.open(buf)
    plt.close()

    return combined_img, heatmap_fig, summary


with gr.Blocks(
    css="""gradio-app {background: #222222 !important}""",
    title="JPEG Compression via Discrete Cosine Transform (DCT)",
) as demo:
    with gr.Row():
        threshold_slider = gr.Slider(
            minimum=0, maximum=100, step=0.5, value=75, label="Threshold"
        )
        block_size_input = gr.Number(value=8, label="Block Size (n x n)", precision=0)

    uploaded_image_input = gr.Image(type="numpy", label="Upload your image (optional)")
    sample_choice_input = gr.Dropdown(
        choices=["cameraman", "coins", "moon"],
        value="cameraman",
        label="Or select a sample image",
    )

    output_combined = gr.Image(label="Original vs Reconstructed Image")
    output_heatmap = gr.Plot(label="DCT Coefficients Heatmap (First Block)")
    output_stats = gr.JSON(label="Compression Statistics")

    btn = gr.Button("Apply Compression")

    kwargs = dict(
        fn=gradio_interface,
        inputs=[
            threshold_slider,
            block_size_input,
            uploaded_image_input,
            sample_choice_input,
        ],
        outputs=[output_combined, output_heatmap, output_stats],
    )
    btn.click(**kwargs)
    demo.load(**kwargs)

Code

demo.launch(pwa=True, show_api=False, show_error=True)

Code

# Output of this cell set dynamically in Quarto filter step

import micropip await micropip.install('plotly==5.24.1'); import numpy as np import matplotlib.pyplot as plt import gradio as gr from skimage import data from io import BytesIO import PIL.Image import plotly.graph_objects as go from typing import Optional def dct_matrix(n: int) -> np.ndarray: """ Generate the DCT transformation matrix of size n x n. Args: n: Block size. Returns: A numpy array representing the DCT matrix. """ T = np.empty((n, n)) factor = np.pi / (2 * n) for k in range(n): for r in range(n): alpha = np.sqrt(1 / n) if k == 0 else np.sqrt(2 / n) T[k, r] = alpha * np.cos((2 * r + 1) * k * factor) return T def dct2(block: np.ndarray) -> np.ndarray: """ Compute the 2D DCT of an n x n block. Args: block: A 2D numpy array. Returns: The DCT coefficients as a 2D numpy array. """ n = block.shape[0] T = dct_matrix(n) return T @ block @ T.T def idct2(coeff: np.ndarray) -> np.ndarray: """ Compute the 2D inverse DCT of an n x n coefficient block. Args: coeff: A 2D numpy array of DCT coefficients. Returns: The reconstructed block as a 2D numpy array. """ n = coeff.shape[0] T = dct_matrix(n) return T.T @ coeff @ T def image_to_blocks(img: np.ndarray, block_size: int) -> list: """ Decompose the image into non-overlapping blocks of size block_size x block_size. Args: img: A 2D numpy array representing the grayscale image. block_size: The size of each block. Returns: A list of lists of blocks. """ M, N = img.shape blocks = [] for i in range(0, M, block_size): row = [] for j in range(0, N, block_size): block = img[i : i + block_size, j : j + block_size] # Pad if necessary so that every block is block_size x block_size. if block.shape != (block_size, block_size): block = np.pad( block, ( (0, block_size - block.shape[0]), (0, block_size - block.shape[1]), ), mode="constant", ) row.append(block.astype(np.float32)) blocks.append(row) return blocks def blocks_to_image(blocks: list) -> np.ndarray: """ Reconstruct the image from blocks. Args: blocks: A list of lists of 2D numpy arrays. Returns: The reconstructed image as a 2D numpy array. """ row_images = [np.hstack(row) for row in blocks] return np.vstack(row_images) def apply_dct_to_blocks(blocks: list) -> list: """ Apply 2D DCT to each image block. Args: blocks: A list of lists of image blocks. Returns: A list of lists with the DCT-transformed blocks. """ return [[dct2(block) for block in row] for row in blocks] def apply_idct_to_blocks(blocks: list) -> list: """ Apply 2D inverse DCT to each block. Args: blocks: A list of lists of DCT coefficient blocks. Returns: A list of lists with the inverse-transformed blocks. """ return [[idct2(block) for block in row] for row in blocks] def threshold_coefficients(dct_blocks: list, threshold: float) -> tuple: """ Apply thresholding to DCT coefficients in each block. Args: dct_blocks: A list of lists of DCT coefficient blocks. threshold: The threshold value. Coefficients with absolute value below this are set to zero. Returns: A tuple containing: - The thresholded blocks. - The total count of nonzero coefficients. """ nonzero_count = 0 new_blocks = [] for row in dct_blocks: new_row = [] for block in row: mask = np.abs(block) > threshold nonzero_count += np.count_nonzero(mask) new_row.append(block * mask) new_blocks.append(new_row) return new_blocks, nonzero_count def load_input_image( uploaded_image: Optional[object], sample_choice: str ) -> np.ndarray: """ Load the input image from an upload or sample selection and convert it to grayscale. Args: uploaded_image: The uploaded image (as a numpy array or PIL.Image) or None. sample_choice: The name of the sample image to use if no upload is provided. Returns: A 2D numpy array representing the grayscale image. """ if uploaded_image is not None: # Check if the uploaded image is a numpy array if isinstance(uploaded_image, np.ndarray): img = uploaded_image else: # Assume it's a PIL image img = np.array(uploaded_image) # Convert to grayscale if needed (if image has 3 channels) if img.ndim == 3: if img.shape[2] >= 3: # Use standard luminance conversion img = np.dot(img[..., :3], [0.2989, 0.5870, 0.1140]) else: img = img[..., 0] return img.astype(np.uint8) else: # Load sample image from skimage.data if sample_choice.lower() == "cameraman": return data.camera() elif sample_choice.lower() == "coins": return data.coins() elif sample_choice.lower() == "moon": return data.moon() else: # Default to cameraman if unknown choice. return data.camera() def plotly_heatmap(coeff_block: np.ndarray) -> go.Figure: """ Generate a Plotly heatmap for a given DCT coefficient block. Args: coeff_block: A 2D numpy array of DCT coefficients. Returns: A Plotly Figure object displaying the heatmap. """ fig = go.Figure(data=go.Heatmap(z=coeff_block, colorscale="Viridis")) fig.update_layout( title="DCT Coefficients (First Block)", xaxis_title="Coefficient index", yaxis_title="Coefficient index", margin=dict(l=20, r=20, t=40, b=20), ) return fig def process_image( threshold: float, block_size: int = 8, uploaded_image: Optional[object] = None, sample_choice: str = "cameraman", ) -> tuple: """ Process the input image using block-wise 2D DCT, thresholding, and IDCT reconstruction. Args: threshold: The threshold value for the DCT coefficients. block_size: The size of each block (default 8). uploaded_image: The uploaded image (if any). sample_choice: The sample image to use if no upload is provided. Returns: A tuple containing: - The original image (as a 2D numpy array). - The reconstructed image after thresholding. - A Plotly Figure object for the DCT coefficients heatmap (first block). - A JSON-serializable dictionary with compression statistics. """ # Load input image (upload takes precedence over sample choice) img = load_input_image(uploaded_image, sample_choice) M, N = img.shape # Crop the image so that its dimensions are multiples of block_size M_new = (M // block_size) * block_size N_new = (N // block_size) * block_size img = img[:M_new, :N_new] # Partition the image into blocks blocks = image_to_blocks(img, block_size) # Apply the 2D DCT to each block dct_blocks = apply_dct_to_blocks(blocks) total_coeffs = M_new * N_new # one coefficient per pixel # Threshold the DCT coefficients thresh_blocks, nonzero_count = threshold_coefficients(dct_blocks, threshold) # Reconstruct the image using inverse DCT idct_blocks = apply_idct_to_blocks(thresh_blocks) reconstructed = blocks_to_image(idct_blocks) # Prepare a summary of the compression statistics summary = { "block_size": block_size, "image_shape": {"rows": int(M_new), "cols": int(N_new)}, "total_coefficients": int(total_coeffs), "nonzero_coefficients": int(nonzero_count), "compression_ratio": float(nonzero_count / total_coeffs), } # Generate a responsive Plotly heatmap for the DCT coefficients of the first block first_block = dct_blocks[0][0] heatmap_fig = plotly_heatmap(first_block) return img, reconstructed, heatmap_fig, summary def gradio_interface( threshold: float, block_size: int, uploaded_image: Optional[object], sample_choice: str, ) -> tuple: """ Gradio interface function to process the image and return outputs. Args: threshold: The threshold value for DCT coefficients. block_size: The block size for image decomposition. uploaded_image: The uploaded image (if provided). sample_choice: The sample image to use if no upload is provided. Returns: A tuple containing: - A combined image (side-by-side original and reconstructed). - A Plotly Figure for the DCT coefficients heatmap. - A dictionary with compression statistics. """ original, reconstructed, heatmap_fig, summary = process_image( threshold, block_size, uploaded_image, sample_choice ) # Create a side-by-side visualization of the original and reconstructed images. fig, axes = plt.subplots(1, 2, figsize=(10, 5)) axes[0].imshow(original, cmap="gray") axes[0].set_title("Original Image") axes[0].axis("off") axes[1].imshow(reconstructed, cmap="gray") axes[1].set_title("Reconstructed Image") axes[1].axis("off") plt.tight_layout() buf = BytesIO() plt.savefig(buf, format="png") buf.seek(0) combined_img = PIL.Image.open(buf) plt.close() return combined_img, heatmap_fig, summary with gr.Blocks( css="""gradio-app {background: #222222 !important}""", title="JPEG Compression via Discrete Cosine Transform (DCT)", ) as demo: with gr.Row(): threshold_slider = gr.Slider( minimum=0, maximum=100, step=0.5, value=75, label="Threshold" ) block_size_input = gr.Number(value=8, label="Block Size (n x n)", precision=0) uploaded_image_input = gr.Image(type="numpy", label="Upload your image (optional)") sample_choice_input = gr.Dropdown( choices=["cameraman", "coins", "moon"], value="cameraman", label="Or select a sample image", ) output_combined = gr.Image(label="Original vs Reconstructed Image") output_heatmap = gr.Plot(label="DCT Coefficients Heatmap (First Block)") output_stats = gr.JSON(label="Compression Statistics") btn = gr.Button("Apply Compression") kwargs = dict( fn=gradio_interface, inputs=[ threshold_slider, block_size_input, uploaded_image_input, sample_choice_input, ], outputs=[output_combined, output_heatmap, output_stats], ) btn.click(**kwargs) demo.load(**kwargs) demo.launch(pwa=True, show_api=False, show_error=True)