/*
* Prime generation.
*/
#include
#include
#include "ssh.h"
#include "mpint.h"
#include "mpunsafe.h"
#include "sshkeygen.h"
/* ----------------------------------------------------------------------
* Standard probabilistic prime-generation algorithm:
*
* - get a number from our PrimeCandidateSource which will at least
* avoid being divisible by any prime under 2^16
*
* - perform the Miller-Rabin primality test enough times to
* ensure the probability of it being composite is 2^-80 or
* less
*
* - go back to square one if any M-R test fails.
*/
static PrimeGenerationContext *probprime_new_context(
const PrimeGenerationPolicy *policy)
{
PrimeGenerationContext *ctx = snew(PrimeGenerationContext);
ctx->vt = policy;
return ctx;
}
static void probprime_free_context(PrimeGenerationContext *ctx)
{
sfree(ctx);
}
static ProgressPhase probprime_add_progress_phase(
const PrimeGenerationPolicy *policy,
ProgressReceiver *prog, unsigned bits)
{
/*
* The density of primes near x is 1/(log x). When x is about 2^b,
* that's 1/(b log 2).
*
* But we're only doing the expensive part of the process (the M-R
* checks) for a number that passes the initial winnowing test of
* having no factor less than 2^16 (at least, unless the prime is
* so small that PrimeCandidateSource gives up on that winnowing).
* The density of _those_ numbers is about 1/19.76. So the odds of
* hitting a prime per expensive attempt are boosted by a factor
* of 19.76.
*/
const double log_2 = 0.693147180559945309417232121458;
double winnow_factor = (bits < 32 ? 1.0 : 19.76);
double prob = winnow_factor / (bits * log_2);
/*
* Estimate the cost of prime generation as the cost of the M-R
* modexps.
*/
double cost = (miller_rabin_checks_needed(bits) *
estimate_modexp_cost(bits));
return progress_add_probabilistic(prog, cost, prob);
}
static mp_int *probprime_generate(
PrimeGenerationContext *ctx,
PrimeCandidateSource *pcs, ProgressReceiver *prog)
{
pcs_ready(pcs);
while (true) {
progress_report_attempt(prog);
mp_int *p = pcs_generate(pcs);
if (!p) {
pcs_free(pcs);
return NULL;
}
MillerRabin *mr = miller_rabin_new(p);
bool known_bad = false;
unsigned nchecks = miller_rabin_checks_needed(mp_get_nbits(p));
for (unsigned check = 0; check < nchecks; check++) {
if (!miller_rabin_test_random(mr)) {
known_bad = true;
break;
}
}
miller_rabin_free(mr);
if (!known_bad) {
/*
* We have a prime!
*/
pcs_free(pcs);
return p;
}
mp_free(p);
}
}
static strbuf *null_mpu_certificate(PrimeGenerationContext *ctx, mp_int *p)
{
return NULL;
}
const PrimeGenerationPolicy primegen_probabilistic = {
probprime_add_progress_phase,
probprime_new_context,
probprime_free_context,
probprime_generate,
null_mpu_certificate,
};
/* ----------------------------------------------------------------------
* Alternative provable-prime algorithm, based on the following paper:
*
* [MAURER] Maurer, U.M. Fast generation of prime numbers and secure
* public-key cryptographic parameters. J. Cryptology 8, 123–155
* (1995). https://doi.org/10.1007/BF00202269
*/
typedef enum SubprimePolicy {
SPP_FAST,
SPP_MAURER_SIMPLE,
SPP_MAURER_COMPLEX,
} SubprimePolicy;
typedef struct ProvablePrimePolicyExtra {
SubprimePolicy spp;
} ProvablePrimePolicyExtra;
typedef struct ProvablePrimeContext ProvablePrimeContext;
struct ProvablePrimeContext {
Pockle *pockle;
PrimeGenerationContext pgc;
const ProvablePrimePolicyExtra *extra;
};
static PrimeGenerationContext *provableprime_new_context(
const PrimeGenerationPolicy *policy)
{
ProvablePrimeContext *ppc = snew(ProvablePrimeContext);
ppc->pgc.vt = policy;
ppc->pockle = pockle_new();
ppc->extra = policy->extra;
return &ppc->pgc;
}
static void provableprime_free_context(PrimeGenerationContext *ctx)
{
ProvablePrimeContext *ppc = container_of(ctx, ProvablePrimeContext, pgc);
pockle_free(ppc->pockle);
sfree(ppc);
}
static ProgressPhase provableprime_add_progress_phase(
const PrimeGenerationPolicy *policy,
ProgressReceiver *prog, unsigned bits)
{
/*
* Estimating the cost of making a _provable_ prime is difficult
* because of all the recursions to smaller sizes.
*
* Once you have enough factors of p-1 to certify primality of p,
* the remaining work in provable prime generation is not very
* different from probabilistic: you generate a random candidate,
* test its primality probabilistically, and use the witness value
* generated as a byproduct of that test for the full Pocklington
* verification. The expensive part, as usual, is made of modpows.
*
* The Pocklington test needs at least two modpows (one for the
* Fermat check, and one per known factor of p-1).
*
* The prior M-R step needs an unknown number, because we iterate
* until we find a value whose order is divisible by the largest
* power of 2 that divides p-1, say 2^j. That excludes half the
* possible witness values (specifically, the quadratic residues),
* so we expect to need on average two M-R operations to find one.
* But that's only if the number _is_ prime - as usual, it's also
* possible that we hit a non-prime and have to try again.
*
* So, if we were only estimating the cost of that final step, it
* would look a lot like the probabilistic version: we'd have to
* estimate the expected total number of modexps by knowing
* something about the density of primes among our candidate
* integers, and then multiply that by estimate_modexp_cost(bits).
* But the problem is that we also have to _find_ a smaller prime,
* so we have to recurse.
*
* In the MAURER_SIMPLE version of the algorithm, you recurse to
* any one of a range of possible smaller sizes i, each with
* probability proportional to 1/i. So your expected time to
* generate an n-bit prime is given by a horrible recurrence of
* the form E_n = S_n + (sum E_i/i) / (sum 1/i), in which S_n is
* the expected cost of the final step once you have your smaller
* primes, and both sums are over ceil(n/2) <= i <= n-20.
*
* At this point I ran out of effort to actually do the maths
* rigorously, so instead I did the empirical experiment of
* generating that sequence in Python and plotting it on a graph.
* My Python code is here, in case I need it again:
from math import log
alpha = log(3)/log(2) + 1 # exponent for modexp using Karatsuba mult
E = [1] * 16 # assume generating tiny primes is trivial
for n in range(len(E), 4096):
# Expected time for sub-generations, as a weighted mean of prior
# values of the same sequence.
lo = (n+1)//2
hi = n-20
if lo <= hi:
subrange = range(lo, hi+1)
num = sum(E[i]/i for i in subrange)
den = sum(1/i for i in subrange)
else:
num, den = 0, 1
# Constant term (cost of final step).
# Similar to probprime_add_progress_phase.
winnow_factor = 1 if n < 32 else 19.76
prob = winnow_factor / (n * log(2))
cost = 4 * n**alpha / prob
E.append(cost + num / den)
for i, p in enumerate(E):
try:
print(log(i), log(p))
except ValueError:
continue
* The output loop prints the logs of both i and E_i, so that when
* I plot the resulting data file in gnuplot I get a log-log
* diagram. That showed me some early noise and then a very
* straight-looking line; feeding the straight part of the graph
* to linear-regression analysis reported that it fits the line
*
* log E_n = -1.7901825337965498 + 3.6199197179662517 * log(n)
* => E_n = 0.16692969657466802 * n^3.6199197179662517
*
* So my somewhat empirical estimate is that Maurer prime
* generation costs about 0.167 * bits^3.62, in the same arbitrary
* time units used by estimate_modexp_cost.
*/
return progress_add_linear(prog, 0.167 * pow(bits, 3.62));
}
static mp_int *primegen_small(Pockle *pockle, PrimeCandidateSource *pcs)
{
assert(pcs_get_bits(pcs) <= 32);
pcs_ready(pcs);
while (true) {
mp_int *p = pcs_generate(pcs);
if (!p) {
pcs_free(pcs);
return NULL;
}
if (pockle_add_small_prime(pockle, p) == POCKLE_OK) {
pcs_free(pcs);
return p;
}
mp_free(p);
}
}
#ifdef DEBUG_PRIMEGEN
static void timestamp(FILE *fp)
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
fprintf(fp, "%lu.%09lu: ", (unsigned long)ts.tv_sec,
(unsigned long)ts.tv_nsec);
}
static PRINTF_LIKE(1, 2) void debug_f(const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
timestamp(stderr);
vfprintf(stderr, fmt, ap);
fputc('\n', stderr);
va_end(ap);
}
static void debug_f_mp(const char *fmt, mp_int *x, ...)
{
va_list ap;
va_start(ap, x);
timestamp(stderr);
vfprintf(stderr, fmt, ap);
mp_dump(stderr, "", x, "\n");
va_end(ap);
}
#else
#define debug_f(...) ((void)0)
#define debug_f_mp(...) ((void)0)
#endif
static double uniform_random_double(void)
{
unsigned char randbuf[8];
random_read(randbuf, 8);
return GET_64BIT_MSB_FIRST(randbuf) * 0x1-64;
}
static mp_int *mp_ceil_div(mp_int *n, mp_int *d)
{
mp_int *nplus = mp_add(n, d);
mp_sub_integer_into(nplus, nplus, 1);
mp_int *toret = mp_div(nplus, d);
mp_free(nplus);
return toret;
}
static mp_int *provableprime_generate_inner(
ProvablePrimeContext *ppc, PrimeCandidateSource *pcs,
ProgressReceiver *prog, double progress_origin, double progress_scale)
{
unsigned bits = pcs_get_bits(pcs);
assert(bits > 1);
if (bits <= 32) {
debug_f("ppgi(%u) -> small", bits);
return primegen_small(ppc->pockle, pcs);
}
unsigned min_bits_needed, max_bits_needed;
{
/*
* Find the product of all the prime factors we already know
* about.
*/
mp_int *size_got = mp_from_integer(1);
size_t nfactors;
mp_int **factors = pcs_get_known_prime_factors(pcs, &nfactors);
for (size_t i = 0; i < nfactors; i++) {
mp_int *to_free = size_got;
size_got = mp_unsafe_shrink(mp_mul(size_got, factors[i]));
mp_free(to_free);
}
/*
* Find the largest cofactor we might be able to use, and the
* smallest one we can get away with.
*/
mp_int *upperbound = pcs_get_upper_bound(pcs);
mp_int *size_needed = mp_nthroot(upperbound, 3, NULL);
debug_f_mp("upperbound = ", upperbound);
{
mp_int *to_free = upperbound;
upperbound = mp_unsafe_shrink(mp_div(upperbound, size_got));
mp_free(to_free);
}
debug_f_mp("size_needed = ", size_needed);
{
mp_int *to_free = size_needed;
size_needed = mp_unsafe_shrink(mp_ceil_div(size_needed, size_got));
mp_free(to_free);
}
max_bits_needed = pcs_get_bits_remaining(pcs);
/*
* We need a prime that is greater than or equal to
* 'size_needed' in order for the product of all our known
* factors of p-1 to exceed the cube root of the largest value
* p might take.
*
* Since pcs_new wants a size specified in bits, we must count
* the bits in size_needed and then add 1. Otherwise we might
* get a value with the same bit count as size_needed but
* slightly smaller than it.
*
* An exception is if size_needed = 1. In that case the
* product of existing known factors is _already_ enough, so
* we don't need to generate an extra factor at all.
*/
if (mp_hs_integer(size_needed, 2)) {
min_bits_needed = mp_get_nbits(size_needed) + 1;
} else {
min_bits_needed = 0;
}
mp_free(upperbound);
mp_free(size_needed);
mp_free(size_got);
}
double progress = 0.0;
if (min_bits_needed) {
debug_f("ppgi(%u) recursing, need [%u,%u] more bits",
bits, min_bits_needed, max_bits_needed);
unsigned *sizes = NULL;
size_t nsizes = 0, sizesize = 0;
unsigned real_min = max_bits_needed / 2;
unsigned real_max = (max_bits_needed >= 20 ?
max_bits_needed - 20 : 0);
if (real_min < min_bits_needed)
real_min = min_bits_needed;
if (real_max < real_min)
real_max = real_min;
debug_f("ppgi(%u) revised bits interval = [%u,%u]",
bits, real_min, real_max);
switch (ppc->extra->spp) {
case SPP_FAST:
/*
* Always pick the smallest subsidiary prime we can get
* away with: just over n/3 bits.
*
* This is not a good mode for cryptographic prime
* generation, because it skews the distribution of primes
* greatly, and worse, it skews them in a direction that
* heads away from the properties crypto algorithms tend
* to like.
*
* (For both discrete-log systems and RSA, people have
* tended to recommend in the past that p-1 should have a
* _large_ factor if possible. There's some disagreement
* on which algorithms this is really necessary for, but
* certainly I've never seen anyone recommend arranging a
* _small_ factor on purpose.)
*
* I originally implemented this mode because it was
* convenient for debugging - it wastes as little time as
* possible on finding a sub-prime and lets you get to the
* interesting part! And I leave it in the code because it
* might still be useful for _something_. Because it's
* cryptographically questionable, it's not selectable in
* the UI of either version of PuTTYgen proper; but it can
* be accessed through testcrypt, and if for some reason a
* definite prime is needed for non-crypto purposes, it
* may still be the fastest way to put your hands on one.
*/
debug_f("ppgi(%u) fast mode, just ask for %u bits",
bits, min_bits_needed);
sgrowarray(sizes, sizesize, nsizes);
sizes[nsizes++] = min_bits_needed;
break;
case SPP_MAURER_SIMPLE: {
/*
* Select the size of the subsidiary prime at random from
* sqrt(outputprime) up to outputprime/2^20, in such a way
* that the probability distribution matches that of the
* largest prime factor of a random n-bit number.
*
* Per [MAURER] section 3.4, the cumulative distribution
* function of this relative size is 1+log2(x), for x in
* [1/2,1]. You can generate a value from the distribution
* given by a cdf by applying the inverse cdf to a uniform
* value in [0,1]. Simplifying that in this case, what we
* have to do is raise 2 to the power of a random real
* number between -1 and 0. (And that gives you the number
* of _bits_ in the sub-prime, as a factor of the desired
* output number of bits.)
*
* We also require that the subsidiary prime q is at least
* 20 bits smaller than the output one, to give us a
* fighting chance of there being _any_ prime we can find
* such that q | p-1.
*
* (But these rules have to be applied in an order that
* still leaves us _some_ interval of possible sizes we
* can pick!)
*/
maurer_simple:
debug_f("ppgi(%u) Maurer simple mode", bits);
unsigned sub_bits;
do {
double uniform = uniform_random_double();
sub_bits = real_max * pow(2.0, uniform - 1) + 0.5;
debug_f(" ... %.6f -> %u?", uniform, sub_bits);
} while (!(real_min <= sub_bits && sub_bits <= real_max));
debug_f("ppgi(%u) asking for %u bits", bits, sub_bits);
sgrowarray(sizes, sizesize, nsizes);
sizes[nsizes++] = sub_bits;
break;
}
case SPP_MAURER_COMPLEX: {
/*
* In this mode, we may generate multiple factors of p-1
* which between them add up to at least n/2 bits, in such
* a way that those are guaranteed to be the largest
* factors of p-1 and that they have the same probability
* distribution as the largest k factors would have in a
* random integer. The idea is that this more elaborate
* procedure gets as close as possible to the same
* probability distribution you'd get by selecting a
* completely random prime (if you feasibly could).
*
* Algorithm from Appendix 1 of [MAURER]: we generate
* random real numbers that sum to at most 1, by choosing
* each one uniformly from the range [0, 1 - sum of all
* the previous ones]. We maintain them in a list in
* decreasing order, and we stop as soon as we find an
* initial subsequence of the list s_1,...,s_r such that
* s_1 + ... + s_{r-1} + 2 s_r > 1. In particular, this
* guarantees that the sum of that initial subsequence is
* at least 1/2, so we end up with enough factors to
* satisfy Pocklington.
*/
if (max_bits_needed / 2 + 1 > real_max) {
/* Early exit path in the case where this algorithm
* can't possibly generate a value in the range we
* need. In that situation, fall back to Maurer
* simple. */
debug_f("ppgi(%u) skipping GenerateSizeList, "
"real_max too small", bits);
goto maurer_simple; /* sorry! */
}
double *s = NULL;
size_t ns, ssize = 0;
while (true) {
debug_f("ppgi(%u) starting GenerateSizeList", bits);
ns = 0;
double range = 1.0;
while (true) {
/* Generate the next number */
double u = uniform_random_double() * range;
range -= u;
debug_f(" u_%"SIZEu" = %g", ns, u);
/* Insert it in the list */
sgrowarray(s, ssize, ns);
size_t i;
for (i = ns; i > 0 && s[i-1] < u; i--)
s[i] = s[i-1];
s[i] = u;
ns++;
debug_f(" inserting as s[%"SIZEu"]", i);
/* Look for a suitable initial subsequence */
double sum = 0;
for (i = 0; i < ns; i++) {
sum += s[i];
if (sum + s[i] > 1.0) {
debug_f(" s[0..%"SIZEu"] works!", i);
/* Truncate the sequence here, and stop
* generating random real numbers. */
ns = i+1;
goto got_list;
}
}
}
got_list:;
/*
* Now translate those real numbers into actual bit
* counts, and do a last-minute check to make sure
* their product is going to be in range.
*
* We have to check both the min and max sizes of the
* total. A b-bit number is in [2^{b-1},2^b). So the
* product of numbers of sizes b_1,...,b_k is at least
* 2^{\sum (b_i-1)}, and less than 2^{\sum b_i}.
*/
nsizes = 0;
unsigned min_total = 0, max_total = 0;
for (size_t i = 0; i < ns; i++) {
/* These sizes are measured in actual entropy, so
* add 1 bit each time to account for the
* zero-information leading 1 */
unsigned this_size = max_bits_needed * s[i] + 1;
debug_f(" bits[%"SIZEu"] = %u", i, this_size);
sgrowarray(sizes, sizesize, nsizes);
sizes[nsizes++] = this_size;
min_total += this_size - 1;
max_total += this_size;
}
debug_f(" total bits = [%u,%u)", min_total, max_total);
if (min_total < real_min || max_total > real_max+1) {
debug_f(" total out of range, try again");
} else {
debug_f(" success! %"SIZEu" sub-primes totalling [%u,%u) "
"bits", nsizes, min_total, max_total);
break;
}
}
smemclr(s, ssize * sizeof(*s));
sfree(s);
break;
}
default:
unreachable("bad subprime policy");
}
for (size_t i = 0; i < nsizes; i++) {
unsigned sub_bits = sizes[i];
double progress_in_this_prime = (double)sub_bits / bits;
mp_int *q = provableprime_generate_inner(
ppc, pcs_new(sub_bits),
prog, progress_origin + progress_scale * progress,
progress_scale * progress_in_this_prime);
progress += progress_in_this_prime;
assert(q);
debug_f_mp("ppgi(%u) got factor ", q, bits);
pcs_require_residue_1_mod_prime(pcs, q);
mp_free(q);
}
smemclr(sizes, sizesize * sizeof(*sizes));
sfree(sizes);
} else {
debug_f("ppgi(%u) no need to recurse", bits);
}
debug_f("ppgi(%u) ready, %u bits remaining",
bits, pcs_get_bits_remaining(pcs));
pcs_ready(pcs);
while (true) {
mp_int *p = pcs_generate(pcs);
if (!p) {
pcs_free(pcs);
return NULL;
}
debug_f_mp("provable_step p=", p);
MillerRabin *mr = miller_rabin_new(p);
debug_f("provable_step mr setup done");
mp_int *witness = miller_rabin_find_potential_primitive_root(mr);
miller_rabin_free(mr);
if (!witness) {
debug_f("provable_step mr failed");
mp_free(p);
continue;
}
size_t nfactors;
mp_int **factors = pcs_get_known_prime_factors(pcs, &nfactors);
PockleStatus st = pockle_add_prime(
ppc->pockle, p, factors, nfactors, witness);
if (st != POCKLE_OK) {
debug_f("provable_step proof failed %d", (int)st);
/*
* Check by assertion that the error status is not one of
* the ones we ought to have ruled out already by
* construction. If there's a bug in this code that means
* we can _never_ pass this test (e.g. picking products of
* factors that never quite reach cbrt(n)), we'd rather
* fail an assertion than loop forever.
*/
assert(st == POCKLE_DISCRIMINANT_IS_SQUARE ||
st == POCKLE_WITNESS_POWER_IS_1 ||
st == POCKLE_WITNESS_POWER_NOT_COPRIME);
mp_free(p);
if (witness)
mp_free(witness);
continue;
}
mp_free(witness);
pcs_free(pcs);
debug_f_mp("ppgi(%u) done, got ", p, bits);
progress_report(prog, progress_origin + progress_scale);
return p;
}
}
static mp_int *provableprime_generate(
PrimeGenerationContext *ctx,
PrimeCandidateSource *pcs, ProgressReceiver *prog)
{
ProvablePrimeContext *ppc = container_of(ctx, ProvablePrimeContext, pgc);
mp_int *p = provableprime_generate_inner(ppc, pcs, prog, 0.0, 1.0);
return p;
}
static inline strbuf *provableprime_mpu_certificate(
PrimeGenerationContext *ctx, mp_int *p)
{
ProvablePrimeContext *ppc = container_of(ctx, ProvablePrimeContext, pgc);
return pockle_mpu(ppc->pockle, p);
}
#define DECLARE_POLICY(name, policy) \
static const struct ProvablePrimePolicyExtra \
pppextra_##name = {policy}; \
const PrimeGenerationPolicy name = { \
provableprime_add_progress_phase, \
provableprime_new_context, \
provableprime_free_context, \
provableprime_generate, \
provableprime_mpu_certificate, \
&pppextra_##name, \
}
DECLARE_POLICY(primegen_provable_fast, SPP_FAST);
DECLARE_POLICY(primegen_provable_maurer_simple, SPP_MAURER_SIMPLE);
DECLARE_POLICY(primegen_provable_maurer_complex, SPP_MAURER_COMPLEX);
/* ----------------------------------------------------------------------
* Reusable null implementation of the progress-reporting API.
*/
static inline ProgressPhase null_progress_add(void) {
ProgressPhase ph = { .n = 0 };
return ph;
}
ProgressPhase null_progress_add_linear(
ProgressReceiver *prog, double c) { return null_progress_add(); }
ProgressPhase null_progress_add_probabilistic(
ProgressReceiver *prog, double c, double p) { return null_progress_add(); }
void null_progress_ready(ProgressReceiver *prog) {}
void null_progress_start_phase(ProgressReceiver *prog, ProgressPhase phase) {}
void null_progress_report(ProgressReceiver *prog, double progress) {}
void null_progress_report_attempt(ProgressReceiver *prog) {}
void null_progress_report_phase_complete(ProgressReceiver *prog) {}
const ProgressReceiverVtable null_progress_vt = {
.add_linear = null_progress_add_linear,
.add_probabilistic = null_progress_add_probabilistic,
.ready = null_progress_ready,
.start_phase = null_progress_start_phase,
.report = null_progress_report,
.report_attempt = null_progress_report_attempt,
.report_phase_complete = null_progress_report_phase_complete,
};
/* ----------------------------------------------------------------------
* Helper function for progress estimation.
*/
double estimate_modexp_cost(unsigned bits)
{
/*
* A modexp of n bits goes roughly like O(n^2.58), on the grounds
* that our modmul is O(n^1.58) (Karatsuba) and you need O(n) of
* them in a modexp.
*/
return pow(bits, 2.58);
}