diff --git a/research/mi_lira_2021/README.md b/research/mi_lira_2021/README.md
index 9c025f0..72cd48f 100644
--- a/research/mi_lira_2021/README.md
+++ b/research/mi_lira_2021/README.md
@@ -94,15 +94,15 @@ which should give (something like) the following output
 
 ```
 Attack Ours (online)
-   AUC 0.6675, Accuracy 0.6074, TPR@0.1%FPR of 0.0104
+   AUC 0.6676, Accuracy 0.6077, TPR@0.1%FPR of 0.0169
 Attack Ours (online, fixed variance)
-   AUC 0.6831, Accuracy 0.6140, TPR@0.1%FPR of 0.0541
+   AUC 0.6856, Accuracy 0.6137, TPR@0.1%FPR of 0.0593
 Attack Ours (offline)
-   AUC 0.5465, Accuracy 0.5486, TPR@0.1%FPR of 0.0073
+   AUC 0.5488, Accuracy 0.5500, TPR@0.1%FPR of 0.0130
 Attack Ours (offline, fixed variance)
-   AUC 0.5518, Accuracy 0.5485, TPR@0.1%FPR of 0.0259
+   AUC 0.5549, Accuracy 0.5537, TPR@0.1%FPR of 0.0299
 Attack Global threshold
-   AUC 0.5900, Accuracy 0.6018, TPR@0.1%FPR of 0.0007
+   AUC 0.5921, Accuracy 0.6044, TPR@0.1%FPR of 0.0009
 ```
 
 where the global threshold attack is the baseline, and our online,
diff --git a/research/mi_lira_2021/fprtpr.png b/research/mi_lira_2021/fprtpr.png
index 5ad9dca..8419ca1 100644
Binary files a/research/mi_lira_2021/fprtpr.png and b/research/mi_lira_2021/fprtpr.png differ
diff --git a/research/mi_lira_2021/inference.py b/research/mi_lira_2021/inference.py
index fbe8170..9d78d0b 100644
--- a/research/mi_lira_2021/inference.py
+++ b/research/mi_lira_2021/inference.py
@@ -69,7 +69,7 @@ def main(argv):
                 for dy in range(0, 2*shift+1, stride):
                     this_x = aug_pad[:, dx:dx+32, dy:dy+32, :].transpose((0,3,1,2))
 
-                    logits = model.model(this_x, training=False)
+                    logits = model.model(this_x, training=True)
                     outs.append(logits)
 
         print(np.array(outs).shape)
diff --git a/research/mi_poison_2022/README.md b/research/mi_poison_2022/README.md
index 5066046..a20444d 100644
--- a/research/mi_poison_2022/README.md
+++ b/research/mi_poison_2022/README.md
@@ -88,13 +88,13 @@ which should give (something like) the following output
 
 ```
 Attack No poison (LiRA)
-   AUC 0.6992, Accuracy 0.6240, TPR@0.1%FPR of 0.0529
+   AUC 0.7025, Accuracy 0.6258, TPR@0.1%FPR of 0.0544
 Attack No poison (Global threshold)
-   AUC 0.6200, Accuracy 0.6167, TPR@0.1%FPR of 0.0011
+   AUC 0.6191, Accuracy 0.6173, TPR@0.1%FPR of 0.0012
 Attack With poison (LiRA)
-   AUC 0.9904, Accuracy 0.9617, TPR@0.1%FPR of 0.3730
+   AUC 0.9943, Accuracy 0.9653, TPR@0.1%FPR of 0.4945
 Attack With poison (Global threshold)
-   AUC 0.9911, Accuracy 0.9580, TPR@0.1%FPR of 0.2130
+   AUC 0.9922, Accuracy 0.9603, TPR@0.1%FPR of 0.3930
 ```
 
 where the baselines are LiRA and a simple global threshold on the membership
diff --git a/research/mi_poison_2022/fprtpr.png b/research/mi_poison_2022/fprtpr.png
index 14fe1d0..a870cb9 100644
Binary files a/research/mi_poison_2022/fprtpr.png and b/research/mi_poison_2022/fprtpr.png differ
diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/membership_inference_attack.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/membership_inference_attack.py
index 9c21131..988a50d 100644
--- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/membership_inference_attack.py
+++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/membership_inference_attack.py
@@ -109,8 +109,7 @@ def _run_trained_attack(
           labels[train_indices],
           sample_weight=sample_weights_train,
       )
-      predictions = attacker.predict(features[test_indices])
-      scores[test_indices] = predictions
+      scores[test_indices] = attacker.predict(features[test_indices])
   except ValueError as ve:
     if 'cannot be greater than the number of members in each class.' in str(ve):
       logging.warning('kf.split in _run_trained_attack fails with: %s', str(ve))
@@ -200,8 +199,9 @@ def _run_threshold_attack(attack_input: AttackInputData):
       slice_spec=_get_slice_spec(attack_input),
       data_size=DataSize(ntrain=ntrain, ntest=ntest),
       attack_type=AttackType.THRESHOLD_ATTACK,
-      membership_scores_train=attack_input.get_loss_train(),
-      membership_scores_test=attack_input.get_loss_test(),
+      # Negate loss because training examples are expected to have lower loss.
+      membership_scores_train=-attack_input.get_loss_train(),
+      membership_scores_test=-attack_input.get_loss_test(),
       roc_curve=roc_curve,
       epsilon_lower_bound_value=epsilon_lower_bound_value,
   )
diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py
index f74a9ef..cf45cd5 100644
--- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py
+++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py
@@ -185,19 +185,19 @@ class TrainedAttacker(object):
     raise NotImplementedError()
 
   def predict(self, input_features):
-    """Predicts whether input_features belongs to train or test.
+    """Predicts the probability that input_features belongs to train.
 
     Args:
       input_features : A vector of features with the same semantics as x_train
         passed to train_model.
 
     Returns:
-      An array of probabilities denoting whether the example belongs to test.
+      An array of probabilities that the examples belongs to train.
     """
     if self.model is None:
       raise AssertionError(
           'Model not trained yet. Please call train_model first.')
-    return self.model.predict_proba(input_features)[:, 1]
+    return self.model.predict_proba(input_features)[:, 1]  # Train has label 1
 
 
 class LogisticRegressionAttacker(TrainedAttacker):