Refactor Gaussian Naive Bayes functions for clarity

PRERITARYA · web-flow · commit 3266dfcff19b · 2026-06-26T07:32:12.000+05:30
diff --git a/machine_learning/gaussian_naive_bayes.py b/machine_learning/gaussian_naive_bayes.py
@@ -111,7 +111,7 @@ def compute_mean_variance(values: list[float]) -> tuple[float, float]:
 
     n = len(values)
     mean = sum(values) / n
-    variance = sum((x - mean) ** 2 for x in values) / n
+    variance = sum((value - mean) ** 2 for value in values) / n
     return mean, max(variance, 1e-9)
 
 
@@ -161,18 +161,20 @@ def train(
     return priors, summaries
 
 
-def gaussian_log_probability(x: float, mean: float, variance: float) -> float:
+def gaussian_log_probability(
+    feature_value: float, mean: float, variance: float
+) -> float:
     """
     Compute the log of the Gaussian probability density for a single value.
 
     Uses the formula:
-        log P(x | mean, var) = -0.5 * log(2 * pi * var)
-                               - 0.5 * ((x - mean)^2 / var)
+        log P(feature_value | mean, var) = -0.5 * log(2 * pi * var)
+                                           - 0.5 * ((feature_value - mean)^2 / var)
 
     Args:
-        x:        The observed value.
-        mean:     Mean of the Gaussian distribution.
-        variance: Variance of the Gaussian distribution (must be > 0).
+        feature_value: The observed feature value.
+        mean:          Mean of the Gaussian distribution.
+        variance:      Variance of the Gaussian distribution (must be > 0).
 
     Returns:
         Log probability density as a float.
@@ -191,7 +193,10 @@ def gaussian_log_probability(x: float, mean: float, variance: float) -> float:
     """
     if variance <= 0:
         raise ValueError("Variance must be positive.")
-    return -0.5 * math.log(2 * math.pi * variance) - 0.5 * ((x - mean) ** 2 / variance)
+    return (
+        -0.5 * math.log(2 * math.pi * variance)
+        - 0.5 * ((feature_value - mean) ** 2 / variance)
+    )
 
 
 def predict_single(
@@ -223,7 +228,9 @@ def predict_single(
 
     for class_label, feature_summaries in summaries.items():
         score = priors[class_label]
-        for feature_value, (mean, variance) in zip(feature_vector, feature_summaries):
+        for feature_value, (mean, variance) in zip(
+            feature_vector, feature_summaries
+        ):
             score += gaussian_log_probability(feature_value, mean, variance)
         if score > best_score:
             best_score = score
@@ -300,7 +307,9 @@ def accuracy(predictions: list[int], actual: list[int]) -> float:
     if not predictions:
         raise ValueError("Inputs must not be empty.")
     if len(predictions) != len(actual):
-        raise ValueError("Predictions and actual labels must have the same length.")
+        raise ValueError(
+            "Predictions and actual labels must have the same length."
+        )
     correct = sum(p == a for p, a in zip(predictions, actual))
     return correct / len(actual)