zxcvbn
Version:
realistic password strength estimation
370 lines (332 loc) • 14.6 kB
text/coffeescript
adjacency_graphs = require('./adjacency_graphs')
# on qwerty, 'g' has degree 6, being adjacent to 'ftyhbv'. '\' has degree 1.
# this calculates the average over all keys.
calc_average_degree = (graph) ->
average = 0
for key, neighbors of graph
average += (n for n in neighbors when n).length
average /= (k for k,v of graph).length
average
BRUTEFORCE_CARDINALITY = 10
MIN_GUESSES_BEFORE_GROWING_SEQUENCE = 10000
MIN_SUBMATCH_GUESSES_SINGLE_CHAR = 10
MIN_SUBMATCH_GUESSES_MULTI_CHAR = 50
scoring =
nCk: (n, k) ->
# http://blog.plover.com/math/choose.html
return 0 if k > n
return 1 if k == 0
r = 1
for d in [1..k]
r *= n
r /= d
n -= 1
r
log10: (n) -> Math.log(n) / Math.log(10) # IE doesn't support Math.log10 :(
log2: (n) -> Math.log(n) / Math.log(2)
factorial: (n) ->
# unoptimized, called only on small n
return 1 if n < 2
f = 1
f *= i for i in [2..n]
f
# ------------------------------------------------------------------------------
# search --- most guessable match sequence -------------------------------------
# ------------------------------------------------------------------------------
#
# takes a sequence of overlapping matches, returns the non-overlapping sequence with
# minimum guesses. the following is a O(l_max * (n + m)) dynamic programming algorithm
# for a length-n password with m candidate matches. l_max is the maximum optimal
# sequence length spanning each prefix of the password. In practice it rarely exceeds 5 and the
# search terminates rapidly.
#
# the optimal "minimum guesses" sequence is here defined to be the sequence that
# minimizes the following function:
#
# g = l! * Product(m.guesses for m in sequence) + D^(l - 1)
#
# where l is the length of the sequence.
#
# the factorial term is the number of ways to order l patterns.
#
# the D^(l-1) term is another length penalty, roughly capturing the idea that an
# attacker will try lower-length sequences first before trying length-l sequences.
#
# for example, consider a sequence that is date-repeat-dictionary.
# - an attacker would need to try other date-repeat-dictionary combinations,
# hence the product term.
# - an attacker would need to try repeat-date-dictionary, dictionary-repeat-date,
# ..., hence the factorial term.
# - an attacker would also likely try length-1 (dictionary) and length-2 (dictionary-date)
# sequences before length-3. assuming at minimum D guesses per pattern type,
# D^(l-1) approximates Sum(D^i for i in [1..l-1]
#
# ------------------------------------------------------------------------------
most_guessable_match_sequence: (password, matches, _exclude_additive=false) ->
n = password.length
# partition matches into sublists according to ending index j
matches_by_j = ([] for _ in [0...n])
for m in matches
matches_by_j[m.j].push m
# small detail: for deterministic output, sort each sublist by i.
for lst in matches_by_j
lst.sort (m1, m2) -> m1.i - m2.i
optimal =
# optimal.m[k][l] holds final match in the best length-l match sequence covering the
# password prefix up to k, inclusive.
# if there is no length-l sequence that scores better (fewer guesses) than
# a shorter match sequence spanning the same prefix, optimal.m[k][l] is undefined.
m: ({} for _ in [0...n])
# same structure as optimal.m -- holds the product term Prod(m.guesses for m in sequence).
# optimal.pi allows for fast (non-looping) updates to the minimization function.
pi: ({} for _ in [0...n])
# same structure as optimal.m -- holds the overall metric.
g: ({} for _ in [0...n])
# helper: considers whether a length-l sequence ending at match m is better (fewer guesses)
# than previously encountered sequences, updating state if so.
update = (m, l) =>
k = m.j
pi = @estimate_guesses m, password
if l > 1
# we're considering a length-l sequence ending with match m:
# obtain the product term in the minimization function by multiplying m's guesses
# by the product of the length-(l-1) sequence ending just before m, at m.i - 1.
pi *= optimal.pi[m.i - 1][l - 1]
# calculate the minimization func
g = @factorial(l) * pi
unless _exclude_additive
g += Math.pow(MIN_GUESSES_BEFORE_GROWING_SEQUENCE, l - 1)
# update state if new best.
# first see if any competing sequences covering this prefix, with l or fewer matches,
# fare better than this sequence. if so, skip it and return.
for competing_l, competing_g of optimal.g[k]
continue if competing_l > l
return if competing_g <= g
# this sequence might be part of the final optimal sequence.
optimal.g[k][l] = g
optimal.m[k][l] = m
optimal.pi[k][l] = pi
# helper: evaluate bruteforce matches ending at k.
bruteforce_update = (k) =>
# see if a single bruteforce match spanning the k-prefix is optimal.
m = make_bruteforce_match(0, k)
update(m, 1)
for i in [1..k]
# generate k bruteforce matches, spanning from (i=1, j=k) up to (i=k, j=k).
# see if adding these new matches to any of the sequences in optimal[i-1]
# leads to new bests.
m = make_bruteforce_match(i, k)
for l, last_m of optimal.m[i-1]
l = parseInt(l)
# corner: an optimal sequence will never have two adjacent bruteforce matches.
# it is strictly better to have a single bruteforce match spanning the same region:
# same contribution to the guess product with a lower length.
# --> safe to skip those cases.
continue if last_m.pattern == 'bruteforce'
# try adding m to this length-l sequence.
update(m, l + 1)
# helper: make bruteforce match objects spanning i to j, inclusive.
make_bruteforce_match = (i, j) =>
pattern: 'bruteforce'
token: password[i..j]
i: i
j: j
# helper: step backwards through optimal.m starting at the end,
# constructing the final optimal match sequence.
unwind = (n) =>
optimal_match_sequence = []
k = n - 1
# find the final best sequence length and score
l = undefined
g = Infinity
for candidate_l, candidate_g of optimal.g[k]
if candidate_g < g
l = candidate_l
g = candidate_g
while k >= 0
m = optimal.m[k][l]
optimal_match_sequence.unshift m
k = m.i - 1
l--
optimal_match_sequence
for k in [0...n]
for m in matches_by_j[k]
if m.i > 0
for l of optimal.m[m.i - 1]
l = parseInt(l)
update(m, l + 1)
else
update(m, 1)
bruteforce_update(k)
optimal_match_sequence = unwind(n)
optimal_l = optimal_match_sequence.length
# corner: empty password
if password.length == 0
guesses = 1
else
guesses = optimal.g[n - 1][optimal_l]
# final result object
password: password
guesses: guesses
guesses_log10: @log10 guesses
sequence: optimal_match_sequence
# ------------------------------------------------------------------------------
# guess estimation -- one function per match pattern ---------------------------
# ------------------------------------------------------------------------------
estimate_guesses: (match, password) ->
return match.guesses if match.guesses? # a match's guess estimate doesn't change. cache it.
min_guesses = 1
if match.token.length < password.length
min_guesses = if match.token.length == 1
MIN_SUBMATCH_GUESSES_SINGLE_CHAR
else
MIN_SUBMATCH_GUESSES_MULTI_CHAR
estimation_functions =
bruteforce: @bruteforce_guesses
dictionary: @dictionary_guesses
spatial: @spatial_guesses
repeat: @repeat_guesses
sequence: @sequence_guesses
regex: @regex_guesses
date: @date_guesses
guesses = estimation_functions[match.pattern].call this, match
match.guesses = Math.max guesses, min_guesses
match.guesses_log10 = @log10 match.guesses
match.guesses
bruteforce_guesses: (match) ->
guesses = Math.pow BRUTEFORCE_CARDINALITY, match.token.length
if guesses == Number.POSITIVE_INFINITY
guesses = Number.MAX_VALUE;
# small detail: make bruteforce matches at minimum one guess bigger than smallest allowed
# submatch guesses, such that non-bruteforce submatches over the same [i..j] take precedence.
min_guesses = if match.token.length == 1
MIN_SUBMATCH_GUESSES_SINGLE_CHAR + 1
else
MIN_SUBMATCH_GUESSES_MULTI_CHAR + 1
Math.max guesses, min_guesses
repeat_guesses: (match) ->
match.base_guesses * match.repeat_count
sequence_guesses: (match) ->
first_chr = match.token.charAt(0)
# lower guesses for obvious starting points
if first_chr in ['a', 'A', 'z', 'Z', '0', '1', '9']
base_guesses = 4
else
if first_chr.match /\d/
base_guesses = 10 # digits
else
# could give a higher base for uppercase,
# assigning 26 to both upper and lower sequences is more conservative.
base_guesses = 26
if not match.ascending
# need to try a descending sequence in addition to every ascending sequence ->
# 2x guesses
base_guesses *= 2
base_guesses * match.token.length
MIN_YEAR_SPACE: 20
REFERENCE_YEAR: new Date().getFullYear()
regex_guesses: (match) ->
char_class_bases =
alpha_lower: 26
alpha_upper: 26
alpha: 52
alphanumeric: 62
digits: 10
symbols: 33
if match.regex_name of char_class_bases
Math.pow(char_class_bases[match.regex_name], match.token.length)
else switch match.regex_name
when 'recent_year'
# conservative estimate of year space: num years from REFERENCE_YEAR.
# if year is close to REFERENCE_YEAR, estimate a year space of MIN_YEAR_SPACE.
year_space = Math.abs parseInt(match.regex_match[0]) - @REFERENCE_YEAR
year_space = Math.max year_space, @MIN_YEAR_SPACE
year_space
date_guesses: (match) ->
# base guesses: (year distance from REFERENCE_YEAR) * num_days * num_years
year_space = Math.max(Math.abs(match.year - @REFERENCE_YEAR), @MIN_YEAR_SPACE)
guesses = year_space * 365
# add factor of 4 for separator selection (one of ~4 choices)
guesses *= 4 if match.separator
guesses
KEYBOARD_AVERAGE_DEGREE: calc_average_degree(adjacency_graphs.qwerty)
# slightly different for keypad/mac keypad, but close enough
KEYPAD_AVERAGE_DEGREE: calc_average_degree(adjacency_graphs.keypad)
KEYBOARD_STARTING_POSITIONS: (k for k,v of adjacency_graphs.qwerty).length
KEYPAD_STARTING_POSITIONS: (k for k,v of adjacency_graphs.keypad).length
spatial_guesses: (match) ->
if match.graph in ['qwerty', 'dvorak']
s = @KEYBOARD_STARTING_POSITIONS
d = @KEYBOARD_AVERAGE_DEGREE
else
s = @KEYPAD_STARTING_POSITIONS
d = @KEYPAD_AVERAGE_DEGREE
guesses = 0
L = match.token.length
t = match.turns
# estimate the number of possible patterns w/ length L or less with t turns or less.
for i in [2..L]
possible_turns = Math.min(t, i - 1)
for j in [1..possible_turns]
guesses += @nCk(i - 1, j - 1) * s * Math.pow(d, j)
# add extra guesses for shifted keys. (% instead of 5, A instead of a.)
# math is similar to extra guesses of l33t substitutions in dictionary matches.
if match.shifted_count
S = match.shifted_count
U = match.token.length - match.shifted_count # unshifted count
if S == 0 or U == 0
guesses *= 2
else
shifted_variations = 0
shifted_variations += @nCk(S + U, i) for i in [1..Math.min(S, U)]
guesses *= shifted_variations
guesses
dictionary_guesses: (match) ->
match.base_guesses = match.rank # keep these as properties for display purposes
match.uppercase_variations = @uppercase_variations match
match.l33t_variations = @l33t_variations match
reversed_variations = match.reversed and 2 or 1
match.base_guesses * match.uppercase_variations * match.l33t_variations * reversed_variations
START_UPPER: /^[A-Z][^A-Z]+$/
END_UPPER: /^[^A-Z]+[A-Z]$/
ALL_UPPER: /^[^a-z]+$/
ALL_LOWER: /^[^A-Z]+$/
uppercase_variations: (match) ->
word = match.token
return 1 if word.match(@ALL_LOWER) or word.toLowerCase() == word
# a capitalized word is the most common capitalization scheme,
# so it only doubles the search space (uncapitalized + capitalized).
# allcaps and end-capitalized are common enough too, underestimate as 2x factor to be safe.
for regex in [@START_UPPER, @END_UPPER, @ALL_UPPER]
return 2 if word.match regex
# otherwise calculate the number of ways to capitalize U+L uppercase+lowercase letters
# with U uppercase letters or less. or, if there's more uppercase than lower (for eg. PASSwORD),
# the number of ways to lowercase U+L letters with L lowercase letters or less.
U = (chr for chr in word.split('') when chr.match /[A-Z]/).length
L = (chr for chr in word.split('') when chr.match /[a-z]/).length
variations = 0
variations += @nCk(U + L, i) for i in [1..Math.min(U, L)]
variations
l33t_variations: (match) ->
return 1 if not match.l33t
variations = 1
for subbed, unsubbed of match.sub
# lower-case match.token before calculating: capitalization shouldn't affect l33t calc.
chrs = match.token.toLowerCase().split('')
S = (chr for chr in chrs when chr == subbed).length # num of subbed chars
U = (chr for chr in chrs when chr == unsubbed).length # num of unsubbed chars
if S == 0 or U == 0
# for this sub, password is either fully subbed (444) or fully unsubbed (aaa)
# treat that as doubling the space (attacker needs to try fully subbed chars in addition to
# unsubbed.)
variations *= 2
else
# this case is similar to capitalization:
# with aa44a, U = 3, S = 2, attacker needs to try unsubbed + one sub + two subs
p = Math.min(U, S)
possibilities = 0
possibilities += @nCk(U + S, i) for i in [1..p]
variations *= possibilities
variations
# utilities --------------------------------------------------------------------
module.exports = scoring