practracker.py 10.5 KB
Newer Older
1
#!/usr/bin/python
2
3

"""
4
Best-practices tracker for Tor source code.
5
6
7

Go through the various .c files and collect metrics about them. If the metrics
violate some of our best practices and they are not found in the optional
8
exceptions file, then log a problem about them.
9

10
We currently do metrics about file size, function size and number of includes,
11
for C source files and headers.
12

13
14
15
16
practracker.py should be run with its second argument pointing to the Tor
top-level source directory like this:
  $ python3 ./scripts/maint/practracker/practracker.py .

17
18
19
To regenerate the exceptions file so that it allows all current
problems in the Tor source, use the --regen flag:
  $ python3 --regen ./scripts/maint/practracker/practracker.py .
20
21
"""

22
23
from __future__ import print_function

24
25
26
27
import os, sys

import metrics
import util
28
import problem
29
import includes
30

31
32
# The filename of the exceptions file (it should be placed in the practracker directory)
EXCEPTIONS_FNAME = "./exceptions.txt"
33
34
35
36
37
38
39

# Recommended file size
MAX_FILE_SIZE = 3000 # lines
# Recommended function size
MAX_FUNCTION_SIZE = 100 # lines
# Recommended number of #includes
MAX_INCLUDE_COUNT = 50
40
41
42
43
# Recommended file size for headers
MAX_H_FILE_SIZE = 500
# Recommended include count for headers
MAX_H_INCLUDE_COUNT = 15
44
45
# Recommended number of dependency violations
MAX_DEP_VIOLATIONS = 0
46

47
48
49
50
# Map from problem type to functions that adjust for tolerance
TOLERANCE_FNS = {
    'include-count': lambda n: int(n*1.1),
    'function-size': lambda n: int(n*1.1),
51
52
    'file-size': lambda n: int(n*1.02),
    'dependency-violation': lambda n: (n+2)
53
54
}

55
56
#######################################################

57
58
59
# The Tor source code topdir
TOR_TOPDIR = None

60
61
#######################################################

62
63
64
65
66
67
68
if sys.version_info[0] <= 2:
    def open_file(fname):
        return open(fname, 'r')
else:
    def open_file(fname):
        return open(fname, 'r', encoding='utf-8')

69
def consider_file_size(fname, f):
70
71
    """Consider the size of 'f' and yield an FileSizeItem for it.
    """
72
    file_size = metrics.get_file_len(f)
73
    yield problem.FileSizeItem(fname, file_size)
74

75
def consider_includes(fname, f):
76
77
78
    """Consider the #include count in for 'f' and yield an IncludeCountItem
        for it.
    """
79
    include_count = metrics.get_include_count(f)
80

81
    yield problem.IncludeCountItem(fname, include_count)
82

83
def consider_function_size(fname, f):
84
85
    """yield a FunctionSizeItem for every function in f.
    """
86
87
88

    for name, lines in metrics.get_function_lines(f):
        canonical_function_name = "%s:%s()" % (fname, name)
89
        yield problem.FunctionSizeItem(canonical_function_name, lines)
90

91
92
93
94
95
96
97
98
def consider_include_violations(fname, real_fname, f):
    n = 0
    for item in includes.consider_include_rules(real_fname, f):
        n += 1
    if n:
        yield problem.DependencyViolationItem(fname, n)


99
100
#######################################################

101
def consider_all_metrics(files_list):
102
103
    """Consider metrics for all files, and yield a sequence of problem.Item
       object for those issues."""
104
    for fname in files_list:
105
        with open_file(fname) as f:
106
107
            for item in consider_metrics_for_file(fname, f):
                yield item
108

109
def consider_metrics_for_file(fname, f):
110
    """
111
112
       Yield a sequence of problem.Item objects for all of the metrics in
       'f'.
113
    """
114
    real_fname = fname
115
116
117
118
    # Strip the useless part of the path
    if fname.startswith(TOR_TOPDIR):
        fname = fname[len(TOR_TOPDIR):]

119
    # Get file length
120
121
    for item in consider_file_size(fname, f):
        yield item
122
123
124

    # Consider number of #includes
    f.seek(0)
125
126
    for item in consider_includes(fname, f):
        yield item
127
128
129

    # Get function length
    f.seek(0)
130
131
    for item in consider_function_size(fname, f):
        yield item
132

133
134
    # Check for "upward" includes
    f.seek(0)
135
136
    for item in consider_include_violations(fname, real_fname, f):
        yield item
137

138
139
140
141
HEADER="""\
# Welcome to the exceptions file for Tor's best-practices tracker!
#
# Each line of this file represents a single violation of Tor's best
142
143
# practices -- typically, a violation that we had before practracker.py
# first existed.
144
145
146
#
# There are three kinds of problems that we recognize right now:
#   function-size -- a function of more than {MAX_FUNCTION_SIZE} lines.
147
#   file-size -- a .c file of more than {MAX_FILE_SIZE} lines, or a .h
148
#      file with more than {MAX_H_FILE_SIZE} lines.
149
150
#   include-count -- a .c file with more than {MAX_INCLUDE_COUNT} #includes,
       or a .h file with more than {MAX_H_INCLUDE_COUNT} #includes.
151
152
#   dependency-violation -- a file includes a header that it should
#      not, according to an advisory .may_include file.
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#
# Each line below represents a single exception that practracker should
# _ignore_. Each line has four parts:
#  1. The word "problem".
#  2. The kind of problem.
#  3. The location of the problem: either a filename, or a
#     filename:functionname pair.
#  4. The magnitude of the problem to ignore.
#
# So for example, consider this line:
#    problem file-size /src/core/or/connection_or.c 3200
#
# It tells practracker to allow the mentioned file to be up to 3200 lines
# long, even though ordinarily it would warn about any file with more than
# {MAX_FILE_SIZE} lines.
#
# You can either edit this file by hand, or regenerate it completely by
# running `make practracker-regen`.
171
172
#
# Remember: It is better to fix the problem than to add a new exception!
173
174
175

""".format(**globals())

176
def main(argv):
177
178
179
180
181
182
    import argparse

    progname = argv[0]
    parser = argparse.ArgumentParser(prog=progname)
    parser.add_argument("--regen", action="store_true",
                        help="Regenerate the exceptions file")
183
    parser.add_argument("--list-overbroad", action="store_true",
184
                        help="List over-strict exceptions")
185
186
    parser.add_argument("--exceptions",
                        help="Override the location for the exceptions file")
187
188
    parser.add_argument("--strict", action="store_true",
                        help="Make all warnings into errors")
189
190
    parser.add_argument("--terse", action="store_true",
                        help="Do not emit helpful instructions.")
191
    parser.add_argument("--max-h-file-size", default=MAX_H_FILE_SIZE,
192
                        help="Maximum lines per .h file")
193
    parser.add_argument("--max-h-include-count", default=MAX_H_INCLUDE_COUNT,
194
                        help="Maximum includes per .h file")
195
    parser.add_argument("--max-file-size", default=MAX_FILE_SIZE,
196
                        help="Maximum lines per .c file")
197
    parser.add_argument("--max-include-count", default=MAX_INCLUDE_COUNT,
198
                        help="Maximum includes per .c file")
199
200
    parser.add_argument("--max-function-size", default=MAX_FUNCTION_SIZE,
                        help="Maximum lines per function")
201
202
    parser.add_argument("--max-dependency-violations", default=MAX_DEP_VIOLATIONS,
                        help="Maximum number of dependency violations to allow")
203
204
205
    parser.add_argument("--include-dir", action="append",
                        default=["src"],
                        help="A directory (under topdir) to search for source")
206
207
208
    parser.add_argument("topdir", default=".", nargs="?",
                        help="Top-level directory for the tor source")
    args = parser.parse_args(argv[1:])
209
210

    global TOR_TOPDIR
211
212
213
214
215
    TOR_TOPDIR = args.topdir
    if args.exceptions:
        exceptions_file = args.exceptions
    else:
        exceptions_file = os.path.join(TOR_TOPDIR, "scripts/maint/practracker", EXCEPTIONS_FNAME)
216

217
218
    # 0) Configure our thresholds of "what is a problem actually"
    filt = problem.ProblemFilter()
219
220
221
222
223
    filt.addThreshold(problem.FileSizeItem("*.c", int(args.max_file_size)))
    filt.addThreshold(problem.IncludeCountItem("*.c", int(args.max_include_count)))
    filt.addThreshold(problem.FileSizeItem("*.h", int(args.max_h_file_size)))
    filt.addThreshold(problem.IncludeCountItem("*.h", int(args.max_h_include_count)))
    filt.addThreshold(problem.FunctionSizeItem("*.c", int(args.max_function_size)))
224
225
    filt.addThreshold(problem.DependencyViolationItem("*.c", int(args.max_dependency_violations)))
    filt.addThreshold(problem.DependencyViolationItem("*.h", int(args.max_dependency_violations)))
226

227
    # 1) Get all the .c files we care about
228
    files_list = util.get_tor_c_files(TOR_TOPDIR, args.include_dir)
229

230
231
    # 2) Initialize problem vault and load an optional exceptions file so that
    # we don't warn about the past
232
233
234
    if args.regen:
        tmpname = exceptions_file + ".tmp"
        tmpfile = open(tmpname, "w")
235
        problem_file = tmpfile
236
        problem_file.write(HEADER)
237
238
239
        ProblemVault = problem.ProblemVault()
    else:
        ProblemVault = problem.ProblemVault(exceptions_file)
240
        problem_file = sys.stdout
241

242
243
    # 2.1) Adjust the exceptions so that we warn only about small problems,
    # and produce errors on big ones.
244
    if not (args.regen or args.list_overbroad or args.strict):
245
246
        ProblemVault.set_tolerances(TOLERANCE_FNS)

247
    # 3) Go through all the files and report problems if they are not exceptions
248
    found_new_issues = 0
249
    for item in filt.filter(consider_all_metrics(files_list)):
250
251
        status = ProblemVault.register_problem(item)
        if status == problem.STATUS_ERR:
252
            print(item, file=problem_file)
253
            found_new_issues += 1
254
        elif status == problem.STATUS_WARN:
255
256
            # warnings always go to stdout.
            print("(warning) {}".format(item))
257

258
259
260
261
262
    if args.regen:
        tmpfile.close()
        os.rename(tmpname, exceptions_file)
        sys.exit(0)

263
    # If new issues were found, try to give out some advice to the developer on how to resolve it.
264
    if found_new_issues and not args.regen and not args.terse:
265
        new_issues_str = """\
266
FAILURE: practracker found {} new problem(s) in the code: see warnings above.
267
268
269
270
271

Please fix the problems if you can, and update the exceptions file
({}) if you can't.

See doc/HACKING/HelpfulTools.md for more information on using practracker.\
272
273
274

You can disable this message by setting the TOR_DISABLE_PRACTRACKER environment
variable.
275
""".format(found_new_issues, exceptions_file)
276
277
        print(new_issues_str)

278
    if args.list_overbroad:
279
280
        def k_fn(tup):
            return tup[0].key()
281
        for (ex,p) in sorted(ProblemVault.list_overbroad_exceptions(), key=k_fn):
282
283
284
285
286
            if p is None:
                print(ex, "->", 0)
            else:
                print(ex, "->", p.metric_value)

287
    sys.exit(found_new_issues)
288
289

if __name__ == '__main__':
290
291
    if os.environ.get("TOR_DISABLE_PRACTRACKER"):
        sys.exit(0)
292
    main(sys.argv)