diff --git a/changes/feature9663 b/changes/feature9663
new file mode 100644
index 0000000000000000000000000000000000000000..c02e08d034277419a08fbe9089eae70e45bda848
--- /dev/null
+++ b/changes/feature9663
@@ -0,0 +1,5 @@
+  o Minor feature (performance):
+    - Improve the runtime speed of the ntor handshake by using an
+      optimized curve25519 basepoint scalarmult implementation from the
+      public-domain ed25519-donna by Andrew M. ("floodyberry"), based on
+      ideas by Adam Langley. Implements ticket 9663.
diff --git a/src/common/crypto.c b/src/common/crypto.c
index 934679e67a32e28c4772262cb07becb2beaae315..c9745dd0e6cb561262300bbb2d1ff46016c3ead1 100644
--- a/src/common/crypto.c
+++ b/src/common/crypto.c
@@ -25,6 +25,7 @@
 
 #define CRYPTO_PRIVATE
 #include "crypto.h"
+#include "crypto_curve25519.h"
 
 #if OPENSSL_VERSION_NUMBER < OPENSSL_V_SERIES(1,0,0)
 #error "We require OpenSSL >= 1.0.0"
@@ -305,6 +306,8 @@ crypto_early_init(void)
       return -1;
     if (crypto_init_siphash_key() < 0)
       return -1;
+
+    curve25519_init();
   }
   return 0;
 }
diff --git a/src/common/crypto_curve25519.c b/src/common/crypto_curve25519.c
index 5bb14b0d9506b001ff08d1e08a7469bf16e63fa5..80b0d889520b471ef54f5cf5e1d8bf3e20f79be3 100644
--- a/src/common/crypto_curve25519.c
+++ b/src/common/crypto_curve25519.c
@@ -14,6 +14,8 @@
 #include "util.h"
 #include "torlog.h"
 
+#include "ed25519/donna/ed25519_donna_tor.h"
+
 /* ==============================
    Part 1: wrap a suitable curve25519 implementation as curve25519_impl
    ============================== */
@@ -30,6 +32,10 @@ int curve25519_donna(uint8_t *mypublic,
 #endif
 #endif
 
+static void pick_curve25519_basepoint_impl(void);
+
+static int curve25519_use_ed = -1;
+
 STATIC int
 curve25519_impl(uint8_t *output, const uint8_t *secret,
                 const uint8_t *basepoint)
@@ -50,6 +56,34 @@ curve25519_impl(uint8_t *output, const uint8_t *secret,
   return r;
 }
 
+STATIC int
+curve25519_basepoint_impl(uint8_t *output, const uint8_t *secret)
+{
+  int r = 0;
+  if (PREDICT_UNLIKELY(curve25519_use_ed == -1)) {
+    pick_curve25519_basepoint_impl();
+  }
+
+  /* TODO: Someone should benchmark curved25519_scalarmult_basepoint versus
+   * an optimized NaCl build to see which should be used when compiled with
+   * NaCl available.  I suspected that the ed25519 optimization always wins.
+   */
+  if (PREDICT_LIKELY(curve25519_use_ed == 1)) {
+    curved25519_scalarmult_basepoint_donna(output, secret);
+    r = 0;
+  } else {
+    static const uint8_t basepoint[32] = {9};
+    r = curve25519_impl(output, secret, basepoint);
+  }
+  return r;
+}
+
+void
+curve25519_set_impl_params(int use_ed)
+{
+  curve25519_use_ed = use_ed;
+}
+
 /* ==============================
    Part 2: Wrap curve25519_impl with some convenience types and functions.
    ============================== */
@@ -113,9 +147,7 @@ void
 curve25519_public_key_generate(curve25519_public_key_t *key_out,
                                const curve25519_secret_key_t *seckey)
 {
-  static const uint8_t basepoint[32] = {9};
-
-  curve25519_impl(key_out->public_key, seckey->secret_key, basepoint);
+  curve25519_basepoint_impl(key_out->public_key, seckey->secret_key);
 }
 
 int
@@ -283,3 +315,84 @@ curve25519_handshake(uint8_t *output,
   curve25519_impl(output, skey->secret_key, pkey->public_key);
 }
 
+/** Check whether the ed25519-based curve25519 basepoint optimization seems to
+ * be working. If so, return 0; otherwise return -1. */
+static int
+curve25519_basepoint_spot_check(void)
+{
+  static const uint8_t alicesk[32] = {
+    0x77,0x07,0x6d,0x0a,0x73,0x18,0xa5,0x7d,
+    0x3c,0x16,0xc1,0x72,0x51,0xb2,0x66,0x45,
+    0xdf,0x4c,0x2f,0x87,0xeb,0xc0,0x99,0x2a,
+    0xb1,0x77,0xfb,0xa5,0x1d,0xb9,0x2c,0x2a
+  };
+  static const uint8_t alicepk[32] = {
+    0x85,0x20,0xf0,0x09,0x89,0x30,0xa7,0x54,
+    0x74,0x8b,0x7d,0xdc,0xb4,0x3e,0xf7,0x5a,
+    0x0d,0xbf,0x3a,0x0d,0x26,0x38,0x1a,0xf4,
+    0xeb,0xa4,0xa9,0x8e,0xaa,0x9b,0x4e,0x6a
+  };
+  const int loop_max=200;
+  int save_use_ed = curve25519_use_ed;
+  unsigned char e1[32] = { 5 };
+  unsigned char e2[32] = { 5 };
+  unsigned char x[32],y[32];
+  int i;
+  int r=0;
+
+  /* Check the most basic possible sanity via the test secret/public key pair
+   * used in "Cryptography in NaCl - 2. Secret keys and public keys".  This
+   * may catch catastrophic failures on systems where Curve25519 is expensive, 
+   * without requiring a ton of key generation.
+   */
+  curve25519_use_ed = 1;
+  r |= curve25519_basepoint_impl(x, alicesk);
+  if (fast_memneq(x, alicepk, 32))
+    goto fail;
+
+  /* Ok, the optimization appears to produce passable results, try a few more
+   * values, maybe there's something subtle wrong.
+   */
+  for (i = 0; i < loop_max; ++i) {
+    curve25519_use_ed = 0;
+    r |= curve25519_basepoint_impl(x, e1);
+    curve25519_use_ed = 1;
+    r |= curve25519_basepoint_impl(y, e2);
+    if (fast_memneq(x,y,32))
+      goto fail;
+    memcpy(e1, x, 32);
+    memcpy(e2, x, 32);
+  }
+
+  goto end;
+ fail:
+  r = -1;
+ end:
+  curve25519_use_ed = save_use_ed;
+  return r;
+}
+
+/** Choose whether to use the ed25519-based curve25519-basepoint
+ * implementation. */
+static void
+pick_curve25519_basepoint_impl(void)
+{
+  curve25519_use_ed = 1;
+
+  if (curve25519_basepoint_spot_check() == 0)
+    return;
+
+  log_warn(LD_CRYPTO, "The ed25519-based curve25519 basepoint "
+           "multiplication seems broken; using the curve25519 "
+           "implementation.");
+  curve25519_use_ed = 0;
+}
+
+/** Initialize the curve25519 implementations. This is necessary if you're
+ * going to use them in a multithreaded setting, and not otherwise. */
+void
+curve25519_init(void)
+{
+  pick_curve25519_basepoint_impl();
+}
+
diff --git a/src/common/crypto_curve25519.h b/src/common/crypto_curve25519.h
index 48e8a6d96285b1437da53c7b76cbcccffd0557a6..d53743986c4b442eb985262ea257ce4115c4ef29 100644
--- a/src/common/crypto_curve25519.h
+++ b/src/common/crypto_curve25519.h
@@ -61,6 +61,8 @@ int curve25519_rand_seckey_bytes(uint8_t *out, int extra_strong);
 #ifdef CRYPTO_CURVE25519_PRIVATE
 STATIC int curve25519_impl(uint8_t *output, const uint8_t *secret,
                            const uint8_t *basepoint);
+
+STATIC int curve25519_basepoint_impl(uint8_t *output, const uint8_t *secret);
 #endif
 
 #define CURVE25519_BASE64_PADDED_LEN 44
@@ -82,5 +84,8 @@ ssize_t crypto_read_tagged_contents_from_file(const char *fname,
                                               uint8_t *data_out,
                                               ssize_t data_out_len);
 
+void curve25519_set_impl_params(int use_ed);
+void curve25519_init(void);
+
 #endif
 
diff --git a/src/test/bench.c b/src/test/bench.c
index bc2b1f04d831790743fb86f56de9a621a2c706cf..dbff7d0262bdc08b10ca533b4915bb83ff12d909 100644
--- a/src/test/bench.c
+++ b/src/test/bench.c
@@ -177,7 +177,7 @@ bench_onion_TAP(void)
 }
 
 static void
-bench_onion_ntor(void)
+bench_onion_ntor_impl(void)
 {
   const int iters = 1<<10;
   int i;
@@ -234,6 +234,19 @@ bench_onion_ntor(void)
   dimap_free(keymap, NULL);
 }
 
+static void
+bench_onion_ntor(void)
+{
+  int ed;
+
+  for (ed = 0; ed <= 1; ++ed) {
+    printf("Ed25519-based basepoint multiply = %s.\n",
+           (ed == 0) ? "disabled" : "enabled");
+    curve25519_set_impl_params(ed);
+    bench_onion_ntor_impl();
+  }
+}
+
 static void
 bench_ed25519(void)
 {
diff --git a/src/test/test_crypto.c b/src/test/test_crypto.c
index 6cba850f30ecc8f7c55f279ae54211e9f463d44b..bc88248db093664826a3a33ee8dc546853eb2996 100644
--- a/src/test/test_crypto.c
+++ b/src/test/test_crypto.c
@@ -1124,6 +1124,29 @@ test_crypto_curve25519_impl(void *arg)
   tor_free(mem_op_hex_tmp);
 }
 
+static void
+test_crypto_curve25519_basepoint(void *arg)
+{
+  uint8_t secret[32];
+  uint8_t public1[32];
+  uint8_t public2[32];
+  const int iters = 2048;
+  int i;
+  (void) arg;
+
+  for (i = 0; i < iters; ++i) {
+    crypto_rand((char*)secret, 32);
+    curve25519_set_impl_params(1); /* Use optimization */
+    curve25519_basepoint_impl(public1, secret);
+    curve25519_set_impl_params(0); /* Disable optimization */
+    curve25519_basepoint_impl(public2, secret);
+    tt_mem_op(public1, OP_EQ, public2, 32);
+  }
+
+ done:
+  ;
+}
+
 static void
 test_crypto_curve25519_wrappers(void *arg)
 {
@@ -1733,6 +1756,8 @@ struct testcase_t crypto_tests[] = {
   { "hkdf_sha256", test_crypto_hkdf_sha256, 0, NULL, NULL },
   { "curve25519_impl", test_crypto_curve25519_impl, 0, NULL, NULL },
   { "curve25519_impl_hibit", test_crypto_curve25519_impl, 0, NULL, (void*)"y"},
+  { "curve25519_basepoint",
+    test_crypto_curve25519_basepoint, TT_FORK, NULL, NULL },
   { "curve25519_wrappers", test_crypto_curve25519_wrappers, 0, NULL, NULL },
   { "curve25519_encode", test_crypto_curve25519_encode, 0, NULL, NULL },
   { "curve25519_persist", test_crypto_curve25519_persist, 0, NULL, NULL },