diff --git a/Localization/particle_filter/particle_filter.py b/Localization/particle_filter/particle_filter.py
index ba54a3d12b..9aa83f48ea 100644
--- a/Localization/particle_filter/particle_filter.py
+++ b/Localization/particle_filter/particle_filter.py
@@ -1,9 +1,21 @@
 """
+Particle Filter Localization Sample
 
-Particle Filter localization sample
+This module implements Monte Carlo Localization (MCL) using a particle filter.
+The particle filter is a non-parametric implementation of the Bayes filter
+that represents the belief by a set of random state samples (particles).
 
-author: Atsushi Sakai (@Atsushi_twi)
+Key Features:
+- Non-parametric: Can represent multi-modal distributions
+- Handles non-linear motion and observation models
+- Uses importance sampling and resampling
+
+Algorithm Overview:
+1. Prediction: Propagate particles using motion model with noise
+2. Update: Weight particles based on observation likelihood
+3. Resampling: Resample particles based on weights to avoid degeneracy
 
+author: Atsushi Sakai (@Atsushi_twi)
 """
 import sys
 import pathlib
@@ -16,75 +28,146 @@
 
 from utils.angle import rot_mat_2d
 
-# Estimation parameter of PF
-Q = np.diag([0.2]) ** 2  # range error
-R = np.diag([2.0, np.deg2rad(40.0)]) ** 2  # input error
+# Estimation parameters for Particle Filter
+Q = np.diag([0.2]) ** 2  # Observation noise covariance: range measurement error [m^2]
+R = np.diag([2.0, np.deg2rad(40.0)]) ** 2  # Process noise covariance: [velocity, yaw_rate] error
 
-#  Simulation parameter
-Q_sim = np.diag([0.2]) ** 2
-R_sim = np.diag([1.0, np.deg2rad(30.0)]) ** 2
+# Simulation parameters (ground truth noise)
+Q_sim = np.diag([0.2]) ** 2  # Simulation observation noise [m^2]
+R_sim = np.diag([1.0, np.deg2rad(30.0)]) ** 2  # Simulation process noise
 
-DT = 0.1  # time tick [s]
-SIM_TIME = 50.0  # simulation time [s]
-MAX_RANGE = 20.0  # maximum observation range
+DT = 0.1  # Time step [s]
+SIM_TIME = 50.0  # Total simulation time [s]
+MAX_RANGE = 20.0  # Maximum observation range for RFID sensors [m]
 
-# Particle filter parameter
-NP = 100  # Number of Particle
-NTh = NP / 2.0  # Number of particle for re-sampling
+# Particle filter parameters
+NP = 100  # Number of particles - more particles = better accuracy but slower
+NTh = NP / 2.0  # Resampling threshold based on effective particle number
 
 show_animation = True
 
 
 def calc_input():
-    v = 1.0  # [m/s]
-    yaw_rate = 0.1  # [rad/s]
+    """
+    Calculate control input for the robot.
+
+    Returns:
+        u: Control input vector [velocity, yaw_rate]^T
+           - velocity: forward velocity [m/s]
+           - yaw_rate: angular velocity [rad/s]
+    """
+    v = 1.0  # Forward velocity [m/s]
+    yaw_rate = 0.1  # Angular velocity (yaw rate) [rad/s]
     u = np.array([[v, yaw_rate]]).T
     return u
 
 
 def observation(x_true, xd, u, rf_id):
+    """
+    Simulate observation and dead reckoning with noise.
+
+    This function simulates:
+    1. Ground truth motion with perfect control input
+    2. Range observations to RFID landmarks with noise
+    3. Dead reckoning with noisy control input
+
+    Args:
+        x_true: True state vector [x, y, yaw, v]^T
+        xd: Dead reckoning state vector
+        u: Control input [v, yaw_rate]^T
+        rf_id: RFID landmark positions [[x1, y1], [x2, y2], ...]
+
+    Returns:
+        x_true: Updated true state
+        z: Observations [range, landmark_x, landmark_y] for visible landmarks
+        xd: Updated dead reckoning state
+        ud: Noisy control input used for dead reckoning
+    """
+    # Update true state with perfect control input
     x_true = motion_model(x_true, u)
 
-    # add noise to gps x-y
-    z = np.zeros((0, 3))
+    # Generate range observations to RFID landmarks
+    z = np.zeros((0, 3))  # Initialize empty observation array
 
     for i in range(len(rf_id[:, 0])):
-
+        # Calculate distance from robot to landmark
         dx = x_true[0, 0] - rf_id[i, 0]
         dy = x_true[1, 0] - rf_id[i, 1]
         d = math.hypot(dx, dy)
+
+        # Only observe landmarks within sensor range
         if d <= MAX_RANGE:
-            dn = d + np.random.randn() * Q_sim[0, 0] ** 0.5  # add noise
+            dn = d + np.random.randn() * Q_sim[0, 0] ** 0.5  # Add Gaussian noise to range
             zi = np.array([[dn, rf_id[i, 0], rf_id[i, 1]]])
             z = np.vstack((z, zi))
 
-    # add noise to input
-    ud1 = u[0, 0] + np.random.randn() * R_sim[0, 0] ** 0.5
-    ud2 = u[1, 0] + np.random.randn() * R_sim[1, 1] ** 0.5
+    # Add noise to control input for dead reckoning simulation
+    ud1 = u[0, 0] + np.random.randn() * R_sim[0, 0] ** 0.5  # Noisy velocity
+    ud2 = u[1, 0] + np.random.randn() * R_sim[1, 1] ** 0.5  # Noisy yaw rate
     ud = np.array([[ud1, ud2]]).T
 
+    # Update dead reckoning estimate using noisy input
     xd = motion_model(xd, ud)
 
     return x_true, z, xd, ud
 
 
 def motion_model(x, u):
+    """
+    Motion model for robot kinematics.
+
+    Implements a simple 2D kinematic model:
+        x' = x + v*cos(yaw)*dt
+        y' = y + v*sin(yaw)*dt
+        yaw' = yaw + yaw_rate*dt
+        v' = v
+
+    The model is expressed as: x_{t+1} = F*x_t + B*u_t
+
+    Args:
+        x: State vector [x, y, yaw, v]^T
+           - x, y: 2D position [m]
+           - yaw: orientation [rad]
+           - v: velocity [m/s]
+        u: Control input [v, yaw_rate]^T
+
+    Returns:
+        x: Next state vector [x, y, yaw, v]^T
+    """
+    # State transition matrix (identity for position/orientation, zero for velocity)
     F = np.array([[1.0, 0, 0, 0],
                   [0, 1.0, 0, 0],
                   [0, 0, 1.0, 0],
                   [0, 0, 0, 0]])
 
-    B = np.array([[DT * math.cos(x[2, 0]), 0],
-                  [DT * math.sin(x[2, 0]), 0],
-                  [0.0, DT],
-                  [1.0, 0.0]])
+    # Control input matrix (converts velocity and yaw_rate to state changes)
+    B = np.array([[DT * math.cos(x[2, 0]), 0],      # x position change
+                  [DT * math.sin(x[2, 0]), 0],      # y position change
+                  [0.0, DT],                         # yaw change
+                  [1.0, 0.0]])                       # velocity update
 
+    # Apply motion model: x = F*x + B*u
     x = F.dot(x) + B.dot(u)
 
     return x
 
 
 def gauss_likelihood(x, sigma):
+    """
+    Calculate Gaussian likelihood (probability density).
+
+    This is used to compute the importance weight of each particle
+    based on how well its predicted observation matches the actual observation.
+
+    Formula: p(x) = (1/sqrt(2*pi*sigma^2)) * exp(-x^2 / (2*sigma^2))
+
+    Args:
+        x: Innovation (difference between predicted and actual observation)
+        sigma: Standard deviation of observation noise
+
+    Returns:
+        p: Likelihood (probability density) of the observation
+    """
     p = 1.0 / math.sqrt(2.0 * math.pi * sigma ** 2) * \
         math.exp(-x ** 2 / (2 * sigma ** 2))
 
@@ -93,14 +176,32 @@ def gauss_likelihood(x, sigma):
 
 def calc_covariance(x_est, px, pw):
     """
-    calculate covariance matrix
-    see ipynb doc
+    Calculate covariance matrix from weighted particles.
+
+    The covariance is computed using the weighted sample covariance formula
+    with Bessel's correction for weighted samples:
+
+    Cov = (1/(1-sum(w_i^2))) * sum(w_i * (x_i - x_mean) * (x_i - x_mean)^T)
+
+    This accounts for the effective sample size of weighted particles.
+
+    Args:
+        x_est: Estimated state (weighted mean of particles) [4x1]
+        px: Particle states [4xNP] where each column is a particle
+        pw: Particle weights [1xNP], should sum to 1.0
+
+    Returns:
+        cov: Covariance matrix [4x4] representing uncertainty in the estimate
     """
     cov = np.zeros((4, 4))
     n_particle = px.shape[1]
+
+    # Weighted covariance calculation
     for i in range(n_particle):
-        dx = (px[:, i:i + 1] - x_est)
-        cov += pw[0, i] * dx @ dx.T
+        dx = (px[:, i:i + 1] - x_est)  # Deviation from mean
+        cov += pw[0, i] * dx @ dx.T     # Weighted outer product
+
+    # Bessel's correction for weighted samples
     cov *= 1.0 / (1.0 - pw @ pw.T)
 
     return cov
@@ -108,66 +209,152 @@ def calc_covariance(x_est, px, pw):
 
 def pf_localization(px, pw, z, u):
     """
-    Localization with Particle filter
+    Particle Filter Localization - Main algorithm.
+
+    This implements the standard particle filter algorithm:
+    1. Prediction Step: Propagate each particle using motion model with noise
+    2. Update Step: Weight particles based on observation likelihood
+    3. Resampling Step: Resample if effective particle number is too low
+
+    Algorithm Steps:
+    - For each particle:
+        - Sample noisy control input from motion noise distribution
+        - Predict new particle state using motion model
+        - Calculate importance weight based on observation likelihood
+    - Normalize weights
+    - Estimate state as weighted mean of particles
+    - Resample if particle degeneracy detected (low N_eff)
+
+    Args:
+        px: Particle states [4xNP], each column represents one particle [x,y,yaw,v]^T
+        pw: Particle weights [1xNP], importance weights summing to 1.0
+        z: Observations [Mx3] where M is number of visible landmarks
+           Each row: [range, landmark_x, landmark_y]
+        u: Control input [v, yaw_rate]^T
+
+    Returns:
+        x_est: Estimated state [4x1] - weighted mean of particles
+        p_est: Estimated covariance [4x4] - uncertainty of estimate
+        px: Updated particle states (after potential resampling)
+        pw: Updated particle weights (after potential resampling)
     """
 
+    # ========== Prediction and Update Step ==========
     for ip in range(NP):
-        x = np.array([px[:, ip]]).T
-        w = pw[0, ip]
+        x = np.array([px[:, ip]]).T  # Get particle state
+        w = pw[0, ip]                 # Get particle weight
 
-        #  Predict with random input sampling
-        ud1 = u[0, 0] + np.random.randn() * R[0, 0] ** 0.5
-        ud2 = u[1, 0] + np.random.randn() * R[1, 1] ** 0.5
+        # Prediction: Add process noise to control input (motion model uncertainty)
+        ud1 = u[0, 0] + np.random.randn() * R[0, 0] ** 0.5  # Noisy velocity
+        ud2 = u[1, 0] + np.random.randn() * R[1, 1] ** 0.5  # Noisy yaw rate
         ud = np.array([[ud1, ud2]]).T
-        x = motion_model(x, ud)
+        x = motion_model(x, ud)  # Propagate particle
 
-        #  Calc Importance Weight
+        # Update: Calculate importance weight based on observations
         for i in range(len(z[:, 0])):
-            dx = x[0, 0] - z[i, 1]
-            dy = x[1, 0] - z[i, 2]
-            pre_z = math.hypot(dx, dy)
+            # Predict observation: distance from particle to landmark
+            dx = x[0, 0] - z[i, 1]  # x difference to landmark
+            dy = x[1, 0] - z[i, 2]  # y difference to landmark
+            pre_z = math.hypot(dx, dy)  # Predicted range
+
+            # Innovation: difference between predicted and actual observation
             dz = pre_z - z[i, 0]
+
+            # Update weight by multiplying likelihoods from all observations
             w = w * gauss_likelihood(dz, math.sqrt(Q[0, 0]))
 
+        # Store updated particle state and weight
         px[:, ip] = x[:, 0]
         pw[0, ip] = w
 
-    pw = pw / pw.sum()  # normalize
+    # ========== Normalization ==========
+    pw = pw / pw.sum()  # Normalize weights to sum to 1.0
 
-    x_est = px.dot(pw.T)
-    p_est = calc_covariance(x_est, px, pw)
+    # ========== State Estimation ==========
+    x_est = px.dot(pw.T)  # Weighted mean (expected value)
+    p_est = calc_covariance(x_est, px, pw)  # Weighted covariance
 
-    N_eff = 1.0 / (pw.dot(pw.T))[0, 0]  # Effective particle number
+    # ========== Resampling Step ==========
+    # Calculate effective particle number: N_eff = 1 / sum(w_i^2)
+    # If N_eff is low, particles are degenerating (few particles have significant weight)
+    N_eff = 1.0 / (pw.dot(pw.T))[0, 0]
     if N_eff < NTh:
         px, pw = re_sampling(px, pw)
+
     return x_est, p_est, px, pw
 
 
 def re_sampling(px, pw):
     """
-    low variance re-sampling
+    Low Variance Resampling (Systematic Resampling).
+
+    This is a more efficient resampling method than naive multinomial resampling.
+    It reduces the variance of the resampling process by using stratified sampling.
+
+    Algorithm:
+    1. Create cumulative sum of weights (CDF)
+    2. Generate equally-spaced points with random offset
+    3. For each point, find corresponding particle in CDF
+    4. Sample particles proportional to their weights
+
+    Advantages over multinomial resampling:
+    - Lower variance: particles with similar weights are selected more consistently
+    - Computational efficiency: O(N) instead of O(N log N)
+    - Better preservation of particle diversity
+
+    Args:
+        px: Particle states [4xNP]
+        pw: Particle weights [1xNP]
+
+    Returns:
+        px: Resampled particle states
+        pw: Reset weights (uniform: 1/NP for all particles)
     """
 
+    # Create cumulative distribution function (CDF) from weights
     w_cum = np.cumsum(pw)
-    base = np.arange(0.0, 1.0, 1 / NP)
-    re_sample_id = base + np.random.uniform(0, 1 / NP)
+
+    # Generate systematic sample points: evenly spaced with random offset
+    base = np.arange(0.0, 1.0, 1 / NP)  # Evenly spaced points [0, 1/N, 2/N, ...]
+    re_sample_id = base + np.random.uniform(0, 1 / NP)  # Add random offset [0, 1/N)
+
+    # Select particles based on CDF
     indexes = []
     ind = 0
     for ip in range(NP):
+        # Find first particle whose CDF value exceeds the sample point
         while re_sample_id[ip] > w_cum[ind]:
             ind += 1
         indexes.append(ind)
 
+    # Create new particle set from selected indices
     px = px[:, indexes]
-    pw = np.zeros((1, NP)) + 1.0 / NP  # init weight
+    pw = np.zeros((1, NP)) + 1.0 / NP  # Reset to uniform weights
 
     return px, pw
 
 
 def plot_covariance_ellipse(x_est, p_est):  # pragma: no cover
+    """
+    Plot covariance ellipse representing uncertainty in position estimate.
+
+    The ellipse is drawn using eigenvalue decomposition of the position covariance:
+    - Eigenvalues determine the size (length of semi-axes)
+    - Eigenvectors determine the orientation
+
+    This visualizes the 1-sigma uncertainty region in x-y position.
+
+    Args:
+        x_est: Estimated state [4x1]
+        p_est: Estimated covariance matrix [4x4]
+    """
+    # Extract x-y position covariance
     p_xy = p_est[0:2, 0:2]
+
+    # Eigenvalue decomposition to find ellipse parameters
     eig_val, eig_vec = np.linalg.eig(p_xy)
 
+    # Determine major and minor axes
     if eig_val[0] >= eig_val[1]:
         big_ind = 0
         small_ind = 1
@@ -175,87 +362,119 @@ def plot_covariance_ellipse(x_est, p_est):  # pragma: no cover
         big_ind = 1
         small_ind = 0
 
+    # Parameter for ellipse boundary
     t = np.arange(0, 2 * math.pi + 0.1, 0.1)
 
-    # eig_val[big_ind] or eiq_val[small_ind] were occasionally negative
-    # numbers extremely close to 0 (~10^-20), catch these cases and set the
-    # respective variable to 0
+    # Handle numerical errors: eigenvalues should be positive but can be
+    # slightly negative (~10^-20) due to numerical precision
     try:
-        a = math.sqrt(eig_val[big_ind])
+        a = math.sqrt(eig_val[big_ind])  # Semi-major axis
     except ValueError:
         a = 0
 
     try:
-        b = math.sqrt(eig_val[small_ind])
+        b = math.sqrt(eig_val[small_ind])  # Semi-minor axis
     except ValueError:
         b = 0
 
+    # Generate ellipse points in canonical form
     x = [a * math.cos(it) for it in t]
     y = [b * math.sin(it) for it in t]
+
+    # Rotate ellipse to align with covariance eigenvectors
     angle = math.atan2(eig_vec[1, big_ind], eig_vec[0, big_ind])
     fx = rot_mat_2d(angle) @ np.array([[x, y]])
+
+    # Translate ellipse to estimated position
     px = np.array(fx[:, 0] + x_est[0, 0]).flatten()
     py = np.array(fx[:, 1] + x_est[1, 0]).flatten()
     plt.plot(px, py, "--r")
 
 
 def main():
+    """
+    Main simulation loop for particle filter localization.
+
+    Simulation scenario:
+    - Robot moves in 2D space with noisy velocity and yaw rate
+    - RFID landmarks provide noisy range measurements
+    - Three estimators are compared:
+        1. Ground truth (perfect motion)
+        2. Dead reckoning (noisy odometry only)
+        3. Particle filter (fusion of odometry and range measurements)
+    """
     print(__file__ + " start!!")
 
     time = 0.0
 
-    # RF_ID positions [x, y]
+    # RFID landmark positions [x, y] in global frame
+    # These are known landmark positions used for localization
     rf_id = np.array([[10.0, 0.0],
                       [10.0, 10.0],
                       [0.0, 15.0],
                       [-5.0, 20.0]])
 
-    # State Vector [x y yaw v]'
-    x_est = np.zeros((4, 1))
-    x_true = np.zeros((4, 1))
+    # Initialize state vectors [x, y, yaw, v]^T
+    x_est = np.zeros((4, 1))   # Particle filter estimate
+    x_true = np.zeros((4, 1))  # Ground truth state
+    x_dr = np.zeros((4, 1))    # Dead reckoning estimate
 
-    px = np.zeros((4, NP))  # Particle store
-    pw = np.zeros((1, NP)) + 1.0 / NP  # Particle weight
-    x_dr = np.zeros((4, 1))  # Dead reckoning
+    # Initialize particle filter
+    px = np.zeros((4, NP))  # Particle states: each column is one particle
+    pw = np.zeros((1, NP)) + 1.0 / NP  # Uniform initial weights
 
-    # history
+    # History for plotting trajectories
     h_x_est = x_est
     h_x_true = x_true
     h_x_dr = x_true
 
+    # Main simulation loop
     while SIM_TIME >= time:
         time += DT
-        u = calc_input()
+        u = calc_input()  # Get control input
 
+        # Simulate one time step
         x_true, z, x_dr, ud = observation(x_true, x_dr, u, rf_id)
 
+        # Run particle filter localization
         x_est, PEst, px, pw = pf_localization(px, pw, z, ud)
 
-        # store data history
+        # Store trajectory history
         h_x_est = np.hstack((h_x_est, x_est))
         h_x_dr = np.hstack((h_x_dr, x_dr))
         h_x_true = np.hstack((h_x_true, x_true))
 
         if show_animation:
             plt.cla()
-            # for stopping simulation with the esc key.
+            # Enable stopping simulation with the ESC key
             plt.gcf().canvas.mpl_connect(
                 'key_release_event',
                 lambda event: [exit(0) if event.key == 'escape' else None])
 
+            # Plot observations (lines from robot to observed landmarks)
             for i in range(len(z[:, 0])):
                 plt.plot([x_true[0, 0], z[i, 1]], [x_true[1, 0], z[i, 2]], "-k")
-            plt.plot(rf_id[:, 0], rf_id[:, 1], "*k")
-            plt.plot(px[0, :], px[1, :], ".r")
+
+            # Plot landmarks
+            plt.plot(rf_id[:, 0], rf_id[:, 1], "*k", markersize=10, label="Landmarks")
+
+            # Plot particles (red dots show particle distribution)
+            plt.plot(px[0, :], px[1, :], ".r", alpha=0.5, label="Particles")
+
+            # Plot trajectories
             plt.plot(np.array(h_x_true[0, :]).flatten(),
-                     np.array(h_x_true[1, :]).flatten(), "-b")
+                     np.array(h_x_true[1, :]).flatten(), "-b", label="Ground Truth")
             plt.plot(np.array(h_x_dr[0, :]).flatten(),
-                     np.array(h_x_dr[1, :]).flatten(), "-k")
+                     np.array(h_x_dr[1, :]).flatten(), "-k", label="Dead Reckoning")
             plt.plot(np.array(h_x_est[0, :]).flatten(),
-                     np.array(h_x_est[1, :]).flatten(), "-r")
+                     np.array(h_x_est[1, :]).flatten(), "-r", label="Particle Filter")
+
+            # Plot uncertainty ellipse
             plot_covariance_ellipse(x_est, PEst)
+
             plt.axis("equal")
             plt.grid(True)
+            plt.legend()
             plt.pause(0.001)
 
 
diff --git a/docs/modules/2_localization/particle_filter_localization/particle_filter_localization_main.rst b/docs/modules/2_localization/particle_filter_localization/particle_filter_localization_main.rst
index d648d8e080..0bb4959b56 100644
--- a/docs/modules/2_localization/particle_filter_localization/particle_filter_localization_main.rst
+++ b/docs/modules/2_localization/particle_filter_localization/particle_filter_localization_main.rst
@@ -1,19 +1,37 @@
-Particle filter localization
+Particle Filter Localization
 ----------------------------
 
 .. image:: https://github.com/AtsushiSakai/PythonRoboticsGifs/raw/master/Localization/particle_filter/animation.gif
 
-This is a sensor fusion localization with Particle Filter(PF).
+This is a sensor fusion localization with Particle Filter (PF), also known as Monte Carlo Localization (MCL).
 
-The blue line is true trajectory, the black line is dead reckoning
-trajectory,
+The blue line is the true trajectory, the black line is the dead reckoning trajectory,
+and the red line is the estimated trajectory with PF. The red dots represent individual particles,
+and the red dashed ellipse shows the estimation uncertainty.
 
-and the red line is estimated trajectory with PF.
+The robot measures distances to RFID landmarks, and these measurements are fused with
+odometry data using the particle filter algorithm.
 
-It is assumed that the robot can measure a distance from landmarks
-(RFID).
+Overview
+~~~~~~~~~~~~~
+
+The Particle Filter is a non-parametric implementation of the Bayes filter that represents
+the belief (probability distribution) of the robot's state using a set of weighted samples (particles).
+Unlike parametric filters (e.g., Kalman Filter), it can represent arbitrary probability distributions,
+including multi-modal distributions.
+
+**Key advantages:**
 
-This measurements are used for PF localization.
+- Can handle non-linear motion and observation models
+- Can represent multi-modal distributions (e.g., ambiguous localization)
+- No assumptions about Gaussian distributions
+- Relatively simple to implement
+
+**Key disadvantages:**
+
+- Computationally expensive (scales with number of particles)
+- Can suffer from particle degeneracy
+- Requires many particles in high-dimensional state spaces
 
 Code Link
 ~~~~~~~~~~~~~
@@ -21,23 +39,300 @@ Code Link
 .. autofunction:: Localization.particle_filter.particle_filter.pf_localization
 
 
-How to calculate covariance matrix from particles
+Particle Filter Algorithm
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The particle filter algorithm consists of three main steps:
+
+1. **Prediction Step (Motion Update)**
+2. **Update Step (Measurement Update)**
+3. **Resampling Step**
+
+Algorithm Pseudocode
+^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: text
+
+   For each particle i = 1 to N:
+       1. Sample noisy control: u_i ~ p(u_t | u_t)
+       2. Predict: x_i = f(x_{i,t-1}, u_i)
+       3. For each observation z_j:
+           4. w_i *= p(z_j | x_i)
+   5. Normalize weights: w_i = w_i / sum(w_i)
+   6. Estimate: x_est = sum(w_i * x_i)
+   7. If N_eff < threshold:
+       8. Resample particles
+
+Mathematical Formulation
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+**State Representation:**
+
+The robot state at time :math:`t` is:
+
+.. math:: \mathbf{x}_t = [x_t, y_t, \theta_t, v_t]^T
+
+where :math:`x_t, y_t` is the 2D position, :math:`\theta_t` is the orientation (yaw), and :math:`v_t` is the velocity.
+
+**Particle Set:**
+
+The belief is represented by :math:`N` particles:
+
+.. math:: \mathcal{X}_t = \{x_t^{[1]}, x_t^{[2]}, ..., x_t^{[N]}\}
+
+with corresponding weights:
+
+.. math:: \mathcal{W}_t = \{w_t^{[1]}, w_t^{[2]}, ..., w_t^{[N]}\}, \quad \sum_{i=1}^N w_t^{[i]} = 1
+
+
+Motion Model
+~~~~~~~~~~~~
+
+The robot uses a simple kinematic model:
+
+.. math::
+
+   x_{t+1} = x_t + v_t \cos(\theta_t) \Delta t
+
+.. math::
+
+   y_{t+1} = y_t + v_t \sin(\theta_t) \Delta t
+
+.. math::
+
+   \theta_{t+1} = \theta_t + \omega_t \Delta t
+
+.. math::
+
+   v_{t+1} = v_t
+
+where :math:`\mathbf{u}_t = [v_t, \omega_t]^T` is the control input (velocity and yaw rate).
+
+In matrix form:
+
+.. math:: \mathbf{x}_{t+1} = F\mathbf{x}_t + B\mathbf{u}_t
+
+where
+
+.. math::
+
+   F = \begin{bmatrix}
+   1 & 0 & 0 & 0\\
+   0 & 1 & 0 & 0\\
+   0 & 0 & 1 & 0 \\
+   0 & 0 & 0 & 0
+   \end{bmatrix}, \quad
+   B = \begin{bmatrix}
+   \cos(\theta_t) \Delta t & 0\\
+   \sin(\theta_t) \Delta t & 0\\
+   0 & \Delta t\\
+   1 & 0
+   \end{bmatrix}
+
+**Process Noise:**
+
+Control input noise is modeled as:
+
+.. math:: \mathbf{u}_t \sim \mathcal{N}(\mathbf{u}_t, R)
+
+where :math:`R = \text{diag}(\sigma_v^2, \sigma_\omega^2)` is the process noise covariance.
+
+
+Observation Model
+~~~~~~~~~~~~~~~~~
+
+The robot observes ranges to RFID landmarks at known positions :math:`\mathbf{m}_j = [m_{j,x}, m_{j,y}]^T`.
+
+The predicted observation is:
+
+.. math:: \hat{z}_j = \sqrt{(x_t - m_{j,x})^2 + (y_t - m_{j,y})^2}
+
+**Observation Noise:**
+
+Range measurements have Gaussian noise:
+
+.. math:: z_j \sim \mathcal{N}(\hat{z}_j, \sigma_z^2)
+
+**Likelihood Function:**
+
+The observation likelihood for particle :math:`i` is:
+
+.. math:: p(z_j | x_t^{[i]}) = \frac{1}{\sqrt{2\pi\sigma_z^2}} \exp\left(-\frac{(z_j - \hat{z}_j^{[i]})^2}{2\sigma_z^2}\right)
+
+For multiple observations, likelihoods are multiplied (assuming independence):
+
+.. math:: w_t^{[i]} = w_{t-1}^{[i]} \prod_{j=1}^{M} p(z_j | x_t^{[i]})
+
+
+Resampling
+~~~~~~~~~~
+
+**Particle Degeneracy Problem:**
+
+Over time, most particles have negligible weights, leading to computational waste.
+This is detected using the effective particle number:
+
+.. math:: N_{eff} = \frac{1}{\sum_{i=1}^N (w_t^{[i]})^2}
+
+When :math:`N_{eff} < N_{threshold}`, resampling is triggered.
+
+**Low Variance Resampling (Systematic Resampling):**
+
+This algorithm samples particles proportional to their weights with low variance:
+
+1. Compute cumulative weight distribution: :math:`c^{[i]} = \sum_{j=1}^{i} w^{[j]}`
+2. Generate systematic samples: :math:`r^{[i]} = \frac{i-1}{N} + \frac{U}{N}` where :math:`U \sim \text{Uniform}(0, 1/N)`
+3. For each :math:`r^{[i]}`, find particle :math:`j` such that :math:`c^{[j-1]} < r^{[i]} \leq c^{[j]}`
+4. Reset weights to uniform: :math:`w^{[i]} = 1/N`
+
+
+How to Calculate Covariance Matrix from Particles
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The covariance matrix :math:`\Xi` from particle information is calculated by the following equation:
+The state estimate is the weighted mean:
+
+.. math:: \hat{\mathbf{x}}_t = \sum_{i=1}^N w_t^{[i]} x_t^{[i]}
+
+The covariance matrix :math:`\Xi` is calculated using weighted sample covariance with Bessel's correction:
 
 .. math:: \Xi_{j,k}=\frac{1}{1-\sum^N_{i=1}(w^i)^2}\sum^N_{i=1}w^i(x^i_j-\mu_j)(x^i_k-\mu_k)
 
-- :math:`\Xi_{j,k}` is covariance matrix element at row :math:`i` and column :math:`k`.
+where:
+
+- :math:`\Xi_{j,k}` is the covariance matrix element at row :math:`j` and column :math:`k`
+- :math:`w^i` is the weight of the :math:`i`-th particle
+- :math:`x^i_j` is the :math:`j`-th state component of the :math:`i`-th particle
+- :math:`\mu_j` is the :math:`j`-th component of the mean state
+
+The denominator :math:`(1-\sum^N_{i=1}(w^i)^2)^{-1}` is the correction factor for weighted samples,
+which accounts for the effective sample size.
+
+
+Comparison with Other Localization Methods
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Extended Kalman Filter (EKF)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**Similarities:**
+
+- Both use Bayes filter framework (predict and update)
+- Both handle sensor fusion (odometry + observations)
+- Both estimate mean and covariance
+
+**Differences:**
+
++------------------------+--------------------------------+--------------------------------+
+| Feature                | Particle Filter                | Extended Kalman Filter         |
++========================+================================+================================+
+| Distribution           | Non-parametric (particles)     | Parametric (Gaussian)          |
++------------------------+--------------------------------+--------------------------------+
+| Linearity              | Handles non-linear models      | Requires linearization         |
++------------------------+--------------------------------+--------------------------------+
+| Multi-modal            | Yes (can represent multiple    | No (unimodal Gaussian only)    |
+|                        | hypotheses)                    |                                |
++------------------------+--------------------------------+--------------------------------+
+| Computational cost     | High (O(N) particles)          | Low (fixed, independent of     |
+|                        |                                | uncertainty)                   |
++------------------------+--------------------------------+--------------------------------+
+| Accuracy               | Converges to optimal with      | Optimal only for linear        |
+|                        | infinite particles             | Gaussian systems               |
++------------------------+--------------------------------+--------------------------------+
+| Initial uncertainty    | Can handle unknown initial     | Requires good initial          |
+|                        | position (global localization) | estimate                       |
++------------------------+--------------------------------+--------------------------------+
+
+**When to use:**
+
+- **Particle Filter**: Global localization, kidnapped robot problem, non-Gaussian noise, multi-modal distributions
+- **EKF**: Position tracking, Gaussian noise, computational efficiency required
+
+
+Unscented Kalman Filter (UKF)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**Similarities:**
+
+- Both handle non-linear systems without linearization
+- Both are suitable for highly non-linear motion/observation models
+
+**Differences:**
+
++------------------------+--------------------------------+--------------------------------+
+| Feature                | Particle Filter                | Unscented Kalman Filter        |
++========================+================================+================================+
+| Non-linearity handling | Exact (through sampling)       | Approximation (sigma points)   |
++------------------------+--------------------------------+--------------------------------+
+| Computational cost     | High (many particles)          | Moderate (fixed sigma points)  |
++------------------------+--------------------------------+--------------------------------+
+| Multi-modal            | Yes                            | No (unimodal Gaussian)         |
++------------------------+--------------------------------+--------------------------------+
+| Implementation         | Simple concept                 | More complex mathematics       |
++------------------------+--------------------------------+--------------------------------+
+
+**When to use:**
+
+- **Particle Filter**: Extreme non-linearity, multi-modal distributions, global localization
+- **UKF**: Moderate non-linearity, computational efficiency, tracking with good initialization
+
+
+Histogram Filter
+^^^^^^^^^^^^^^^^
+
+**Similarities:**
+
+- Both are non-parametric
+- Both can represent multi-modal distributions
+
+**Differences:**
+
++------------------------+--------------------------------+--------------------------------+
+| Feature                | Particle Filter                | Histogram Filter               |
++========================+================================+================================+
+| Representation         | Particles (discrete samples)   | Grid cells (discrete bins)     |
++------------------------+--------------------------------+--------------------------------+
+| Dimensionality         | Scales better to high          | Suffers from curse of          |
+|                        | dimensions                     | dimensionality (grid grows     |
+|                        |                                | exponentially)                 |
++------------------------+--------------------------------+--------------------------------+
+| Resolution             | Adaptive (particles            | Fixed grid resolution          |
+|                        | concentrate in high-           |                                |
+|                        | probability regions)           |                                |
++------------------------+--------------------------------+--------------------------------+
+| Memory                 | O(N) particles                 | O(cells^dimensions)            |
++------------------------+--------------------------------+--------------------------------+
+
+**When to use:**
+
+- **Particle Filter**: Higher dimensional state spaces (>2D), dynamic resolution needs
+- **Histogram Filter**: Low-dimensional problems (1D or 2D), simple implementation
+
+
+Summary Table
+^^^^^^^^^^^^^
 
-- :math:`w^i` is the weight of the :math:`i` th particle.
++-------------------+---------+-------------+-------------+------------+----------------+
+| Method            | Linear  | Multi-modal | Comp. Cost  | Global Loc | Best Use Case  |
++===================+=========+=============+=============+============+================+
+| Particle Filter   | No      | Yes         | High        | Yes        | Robust,        |
+|                   |         |             |             |            | global         |
++-------------------+---------+-------------+-------------+------------+----------------+
+| EKF               | Yes*    | No          | Low         | No         | Tracking,      |
+|                   |         |             |             |            | efficient      |
++-------------------+---------+-------------+-------------+------------+----------------+
+| UKF               | No      | No          | Medium      | No         | Non-linear     |
+|                   |         |             |             |            | tracking       |
++-------------------+---------+-------------+-------------+------------+----------------+
+| Histogram         | No      | Yes         | Medium-High | Yes        | Low-dim        |
+|                   |         |             |             |            | global loc     |
++-------------------+---------+-------------+-------------+------------+----------------+
 
-- :math:`x^i_j` is the :math:`j` th state of the :math:`i` th particle.
+\* EKF requires linearization
 
-- :math:`\mu_j` is the :math:`j` th mean state of particles.
 
 Reference
 ~~~~~~~~~~~
 
-- `_PROBABILISTIC ROBOTICS: <http://www.probabilistic-robotics.org>`_
+- `PROBABILISTIC ROBOTICS <http://www.probabilistic-robotics.org>`_ - Chapters 4 (Bayes Filters) and 8 (MCL)
 - `Improving the particle filter in high dimensions using conjugate artificial process noise <https://arxiv.org/pdf/1801.07000>`_
+- S. Thrun, W. Burgard, D. Fox, "Probabilistic Robotics", MIT Press, 2005