Initial

2025-12-22 10:35:30 +00:00
parent 0bfc6c8425
commit 5ce7ca2c5d
38927 changed files with 0 additions and 4594700 deletions
--- a/opt/psa/admin/plib/vendor/bjeavons/zxcvbn-php/data-scripts/build_frequency_lists.py
+++ b/opt/psa/admin/plib/vendor/bjeavons/zxcvbn-php/data-scripts/build_frequency_lists.py
@@ -1,135 +0,0 @@
-#!/usr/bin/python
-import os
-import sys
-import time
-import codecs
-import json
-
-from operator import itemgetter
-
-def usage():
-    return '''
-usage:
-%s data-dir src/Matchers/frequency_lists.json
-
-generates frequency_lists.json (zxcvbn's ranked dictionary file) from word frequency data.
-data-dir should contain frequency counts, as generated by the data-scripts/count_* scripts.
-
-DICTIONARIES controls which frequency data will be included and at maximum how many tokens
-per dictionary.
-
-If a token appears in multiple frequency lists, it will only appear once in emitted .json file,
-in the dictionary where it has lowest rank.
-
-Short tokens, if rare, are also filtered out. If a token has higher rank than 10**(token.length),
-it will be excluded because a bruteforce match would have given it a lower guess score.
-
-A warning will be printed if DICTIONARIES contains a dictionary name that doesn't appear in
-passed data dir, or vice-versa.
-    ''' % sys.argv[0]
-
-# maps dict name to num words. None value means "include all words"
-DICTIONARIES = dict(
-    us_tv_and_film    = 30000,
-    english_wikipedia = 30000,
-    passwords         = 30000,
-    surnames          = 10000,
-    male_names        = None,
-    female_names      = None,
-)
-
-# returns {list_name: {token: rank}}, as tokens and ranks occur in each file.
-def parse_frequency_lists(data_dir):
-    freq_lists = {}
-    for filename in os.listdir(data_dir):
-        freq_list_name, ext = os.path.splitext(filename)
-        if freq_list_name not in DICTIONARIES:
-            msg = 'Warning: %s appears in %s directory but not in DICTIONARY settings. Excluding.'
-            print msg % (freq_list_name, data_dir)
-            continue
-        token_to_rank = {}
-        with codecs.open(os.path.join(data_dir, filename), 'r', 'utf8') as f:
-            for i, line in enumerate(f):
-                rank = i + 1 # rank starts at 1
-                token = line.split()[0]
-                token_to_rank[token] = rank
-        freq_lists[freq_list_name] = token_to_rank
-    for freq_list_name in DICTIONARIES:
-        if freq_list_name not in freq_lists:
-            msg = 'Warning: %s appears in DICTIONARY settings but not in %s directory. Excluding.'
-            print msg % (freq_list, data_dir)
-    return freq_lists
-
-def is_rare_and_short(token, rank):
-    return rank >= 10**len(token)
-
-def has_comma_or_double_quote(token, rank, lst_name):
-    # hax, switch to csv or similar if this excludes too much.
-    # simple comma joining has the advantage of being easy to process
-    # client-side w/o needing a lib, and so far this only excludes a few
-    # very high-rank tokens eg 'ps8,000' at rank 74868 from wikipedia list.
-    if ',' in token or '"' in token:
-        return True
-    return False
-
-def filter_frequency_lists(freq_lists):
-    '''
-    filters frequency data according to:
-        - filter out short tokens if they are too rare.
-        - filter out tokens if they already appear in another dict
-          at lower rank.
-        - cut off final freq_list at limits set in DICTIONARIES, if any.
-    '''
-    filtered_token_and_rank = {} # maps {name: [(token, rank), ...]}
-    token_count = {}             # maps freq list name: current token count.
-    for name in freq_lists:
-        filtered_token_and_rank[name] = []
-        token_count[name] = 0
-    minimum_rank = {} # maps token -> lowest token rank across all freq lists
-    minimum_name = {} # maps token -> freq list name with lowest token rank
-    for name, token_to_rank in freq_lists.iteritems():
-        for token, rank in token_to_rank.iteritems():
-            if token not in minimum_rank:
-                assert token not in minimum_name
-                minimum_rank[token] = rank
-                minimum_name[token] = name
-            else:
-                assert token in minimum_name
-                assert minimum_name[token] != name, 'same token occurs multiple times in %s' % name
-                min_rank = minimum_rank[token]
-                if rank < min_rank:
-                    minimum_rank[token] = rank
-                    minimum_name[token] = name
-    for name, token_to_rank in freq_lists.iteritems():
-        for token, rank in token_to_rank.iteritems():
-            if minimum_name[token] != name:
-                continue
-            if is_rare_and_short(token, rank) or has_comma_or_double_quote(token, rank, name):
-                continue
-            filtered_token_and_rank[name].append((token, rank))
-            token_count[name] += 1
-    result = {}
-    for name, token_rank_pairs in filtered_token_and_rank.iteritems():
-        token_rank_pairs.sort(key=itemgetter(1))
-        cutoff_limit = DICTIONARIES[name]
-        if cutoff_limit and len(token_rank_pairs) > cutoff_limit:
-            token_rank_pairs = token_rank_pairs[:cutoff_limit]
-        result[name] = [pair[0] for pair in token_rank_pairs] # discard rank post-sort
-    return result
-
-def to_kv(lst, lst_name):
-    val = '"%s".split(",")' % ','.join(lst)
-    return '%s: %s' % (lst_name, val)
-
-def main():
-    if len(sys.argv) != 3:
-        print usage()
-        sys.exit(0)
-    data_dir, output_file = sys.argv[1:]
-    unfiltered_freq_lists = parse_frequency_lists(data_dir)
-    freq_lists = filter_frequency_lists(unfiltered_freq_lists)
-    with codecs.open(output_file, 'w', 'utf8') as f:
-        json.dump(freq_lists, f)
-
-if __name__ == '__main__':
-    main()
--- a/opt/psa/admin/plib/vendor/bjeavons/zxcvbn-php/data-scripts/build_keyboard_adjacency_graphs.py
+++ b/opt/psa/admin/plib/vendor/bjeavons/zxcvbn-php/data-scripts/build_keyboard_adjacency_graphs.py
@@ -1,105 +0,0 @@
-#!/usr/bin/python
-import sys
-import json as simplejson
-
-def usage():
-    return '''
-constructs adjacency_graphs.json from QWERTY and DVORAK keyboard layouts
-
-usage:
-%s src/Matchers/adjacency_graphs.json
-''' % sys.argv[0]
-
-qwerty = r'''
-`~ 1! 2@ 3# 4$ 5% 6^ 7& 8* 9( 0) -_ =+
-    qQ wW eE rR tT yY uU iI oO pP [{ ]} \|
-     aA sS dD fF gG hH jJ kK lL ;: '"
-      zZ xX cC vV bB nN mM ,< .> /?
-'''
-
-dvorak = r'''
-`~ 1! 2@ 3# 4$ 5% 6^ 7& 8* 9( 0) [{ ]}
-    '" ,< .> pP yY fF gG cC rR lL /? =+ \|
-     aA oO eE uU iI dD hH tT nN sS -_
-      ;: qQ jJ kK xX bB mM wW vV zZ
-'''
-
-keypad = r'''
-  / * -
-7 8 9 +
-4 5 6
-1 2 3
-  0 .
-'''
-
-mac_keypad = r'''
-  = / *
-7 8 9 -
-4 5 6 +
-1 2 3
-  0 .
-'''
-
-def get_slanted_adjacent_coords(x, y):
-    '''
-    returns the six adjacent coordinates on a standard keyboard, where each row is slanted to the
-    right from the last. adjacencies are clockwise, starting with key to the left, then two keys
-    above, then right key, then two keys below. (that is, only near-diagonal keys are adjacent,
-    so g's coordinate is adjacent to those of t,y,b,v, but not those of r,u,n,c.)
-    '''
-    return [(x-1, y), (x, y-1), (x+1, y-1), (x+1, y), (x, y+1), (x-1, y+1)]
-
-def get_aligned_adjacent_coords(x, y):
-    '''
-    returns the nine clockwise adjacent coordinates on a keypad, where each row is vert aligned.
-    '''
-    return [(x-1, y), (x-1, y-1), (x, y-1), (x+1, y-1), (x+1, y), (x+1, y+1), (x, y+1), (x-1, y+1)]
-
-def build_graph(layout_str, slanted):
-    '''
-    builds an adjacency graph as a dictionary: {character: [adjacent_characters]}.
-    adjacent characters occur in a clockwise order.
-    for example:
-    * on qwerty layout, 'g' maps to ['fF', 'tT', 'yY', 'hH', 'bB', 'vV']
-    * on keypad layout, '7' maps to [None, None, None, '=', '8', '5', '4', None]
-    '''
-    position_table = {} # maps from tuple (x,y) -> characters at that position.
-    tokens = layout_str.split()
-    token_size = len(tokens[0])
-    x_unit = token_size + 1 # x position unit len is token len plus 1 for the following whitespace.
-    adjacency_func = get_slanted_adjacent_coords if slanted else get_aligned_adjacent_coords
-    assert all(len(token) == token_size for token in tokens), 'token len mismatch:\n ' + layout_str
-    for y, line in enumerate(layout_str.split('\n')):
-        # the way I illustrated keys above, each qwerty row is indented one space in from the last
-        slant = y - 1 if slanted else 0
-        for token in line.split():
-            x, remainder = divmod(line.index(token) - slant, x_unit)
-            assert remainder == 0, 'unexpected x offset for %s in:\n%s' % (token, layout_str)
-            position_table[(x,y)] = token
-
-    adjacency_graph = {}
-    for (x,y), chars in position_table.iteritems():
-        for char in chars:
-            adjacency_graph[char] = []
-            for coord in adjacency_func(x, y):
-                # position in the list indicates direction
-                # (for qwerty, 0 is left, 1 is top, 2 is top right, ...)
-                # for edge chars like 1 or m, insert None as a placeholder when needed
-                # so that each character in the graph has a same-length adjacency list.
-                adjacency_graph[char].append(position_table.get(coord, None))
-    return adjacency_graph
-
-if __name__ == '__main__':
-    if len(sys.argv) != 2:
-        print usage()
-        sys.exit(0)
-    with open(sys.argv[1], 'w') as f:
-        data = {
-            'qwerty':     build_graph(qwerty, True),
-            'dvorak':     build_graph(dvorak, True),
-            'keypad':     build_graph(keypad, False),
-            'mac_keypad': build_graph(mac_keypad, False),
-        }
-        simplejson.dump(data, f)
-    sys.exit(0)
-