Reputation: 1729
I have a project whose lines of code I want to count. Is it possible to count all the lines of code in the file directory containing the project by using Python?
Upvotes: 34
Views: 53269
Reputation: 475
I just did a variant of @Bryce93 's response for a python + flask project(s)... ran some pivot tables on the outcome .csv file and the like (I manually marked files as 'active' downstream)... cheers
import os
import pandas as pd
def countlines(start, begin_start=None):
global files
for thing in os.listdir(start):
thing = os.path.join(start, thing)
if os.path.isfile(thing):
if thing.endswith('.py') or thing.endswith('.html'):
with open(thing, 'r') as f:
lines = f.readlines()
count = len([l for l in lines if not l.strip().startswith('#')])
functions, classes, comments = 0, 0, 0
if thing.endswith('.py'):
functions = len([
l for l in lines if l.strip().startswith('def ')
and l.strip().endswith('):')
])
classes = len([
l for l in lines if l.strip().startswith('class ')
and l.strip().endswith('):')
])
comments = len([l for l in lines if l.strip().startswith('#')])
language = 'python'
elif thing.endswith('.html'):
comments = len([l for l in lines if l.strip().startswith('<!--')])
language = 'jinja'
else:
raise Exception(thing)
path = str(thing)
folder = '/'.join(path.split(repo)[-1].split('/')[:-1])
files.append({
'path': path,
'repo': repo,
'language': language,
'filetype': thing.split('.')[-1],
'folder': folder,
'filename': thing.split('/')[-1],
'lines': count,
'functions': functions,
'classes': classes,
'comments': comments,
})
for thing in os.listdir(start):
thing = os.path.join(start, thing)
if os.path.isdir(thing):
countlines(thing, begin_start=start)
files = []
repo = '<repo1>'
countlines('<path>/<repo1>')
master = pd.DataFrame(files)
files = []
repo = '<repo2>'
countlines('<path>/<repo2>')
master = pd.concat([master, pd.DataFrame(files)], ignore_index=False, sort=False)
master['active'] = False
master.sort_values(by=['repo', 'folder', 'language', 'filename'])
master.to_csv('../<blah>.csv')
Upvotes: 1
Reputation:
Use radon
python3 -mpip install radon
radon raw -s pkg_dir/
** Total **
LOC: 2994
LLOC: 1768
SLOC: 1739
Comments: 71
Single comments: 29
Multi: 818
Blank: 408
- Comment Stats
(C % L): 2%
(C % S): 4%
(C + M % L): 30%
it will also calculate cyclomatic complexity
a@debian:~/build/clean/scte35-threefive$ radon cc -a threefive
threefive/base.py
M 61:4 SCTE35Base.kv_clean - A
M 85:4 SCTE35Base.load - A
M 95:4 SCTE35Base._chk_var - A
C 9:0 SCTE35Base - A
M 34:4 SCTE35Base.as_hms - A
M 79:4 SCTE35Base._chk_nbin - A
M 17:4 SCTE35Base.__repr__ - A
M 20:4 SCTE35Base.as_90k - A
M 27:4 SCTE35Base.as_ticks - A
M 48:4 SCTE35Base.get - A
M 54:4 SCTE35Base.get_json - A
threefive/bitn.py
C 9:0 BitBin - A
M 30:4 BitBin.as_int - A
M 47:4 BitBin.as_charset - A
C 99:0 NBin - A
M 133:4 NBin.add_int - A
M 170:4 NBin.reserve - A
.....
246 blocks (classes, functions, methods) analyzed.
Average complexity: A (1.9024390243902438)
Upvotes: 1
Reputation: 14255
Here's another one, using pathlib
. Lists individual (relative) file paths with line count, total number of files, and total line count.
import pathlib
class LoC(object):
suffixes = ['.py']
skip = ['name of dir or file to skip', ...]
def count(self, path, init=True):
path = pathlib.Path(path)
if path.name in self.skip:
print(f'skipped: {path.relative_to(self.root)}')
return
if init:
self.root = path
self.files = 0
self.lines = 0
if path.is_dir():
# recursive case
for item in path.iterdir():
self.count(path=item, init=False)
elif path.is_file() and path.suffix in self.suffixes:
# base case
with path.open(mode='r') as f:
line_count = len(f.readlines())
print(f'{path.relative_to(self.root)}: {line_count}')
self.files += 1
self.lines += line_count
if init:
print(f'\n{self.lines} lines in {self.files} files')
Note I omitted the __init__
method for clarity.
Usage example:
loc = LoC()
loc.count('/path/to/your/project/directory')
Upvotes: 3
Reputation: 126
This is derived from Daniel's answer (though refactored enough that this won't be obvious). That one doesn't recurse through subdirectories, which is the behavior I wanted.
from os import listdir
from os.path import isfile, isdir, join
def item_line_count(path):
if isdir(path):
return dir_line_count(path)
elif isfile(path):
return len(open(path, 'rb').readlines())
else:
return 0
def dir_line_count(dir):
return sum(map(lambda item: item_line_count(join(dir, item)), listdir(dir)))
Upvotes: 3
Reputation: 1729
from os import listdir
from os.path import isfile, join
def countLinesInPath(path,directory):
count=0
for line in open(join(directory,path), encoding="utf8"):
count+=1
return count
def countLines(paths,directory):
count=0
for path in paths:
count=count+countLinesInPath(path,directory)
return count
def getPaths(directory):
return [f for f in listdir(directory) if isfile(join(directory, f))]
def countIn(directory):
return countLines(getPaths(directory),directory)
To count all the lines of code in the files in a directory, call the "countIn" function, passing the directory as a parameter.
Upvotes: 3
Reputation: 21
If you want to count how many lines are in your project, create a script inside of your project folder and paste the following into it:
import os
directory = "[project_directory]"
directory_depth = 100 # How deep you would like to go
extensions_to_consider = [".py", ".css"] # Change to ["all"] to include all extensions
exclude_filenames = ["venv", ".idea", "__pycache__", "cache"]
skip_file_error_list = True
this_file_dir = os.path.realpath(__file__)
print("Path to ignore:", this_file_dir)
print("=====================================")
def _walk(path, depth):
"""Recursively list files and directories up to a certain depth"""
depth -= 1
with os.scandir(path) as p:
for entry in p:
skip_entry = False
for fName in exclude_filenames:
if entry.path.endswith(fName):
skip_entry = True
break
if skip_entry:
print("Skipping entry", entry.path)
continue
yield entry.path
if entry.is_dir() and depth > 0:
yield from _walk(entry.path, depth)
print("Caching entries")
files = list(_walk(directory, directory_depth))
print("=====================================")
print("Counting Lines")
file_err_list = []
line_count = 0
len_files = len(files)
for i, file_dir in enumerate(files):
if file_dir == this_file_dir:
print("=[Rejected file directory", file_dir, "]=")
continue
if not os.path.isfile(file_dir):
continue
skip_File = True
for ending in extensions_to_consider:
if file_dir.endswith(ending) or ending == "all":
skip_File = False
if not skip_File:
try:
file = open(file_dir, "r")
local_count = 0
for line in file:
if line != "\n":
local_count += 1
print("({:.1f}%)".format(100*i/len_files), file_dir, "|", local_count)
line_count += local_count
file.close()
except:
file_err_list.append(file_dir)
continue
print("=====================================")
print("File Count Errors:", len(file_err_list))
if not skip_file_error_list:
for file in file_err_list:
print(file_err_list)
print("=====================================")
print("Total lines |", line_count)
There's probably faster and more efficient ways to do this, but this is a nice start.
directory
is the project directory you want to be counted
directory_depth
is how deep within the project infastructure
i.e. a depth of 3 would mean it will only scan the following depth:
extensions_to_consider
is the file extensions to count code. If you only want to count .py files, you set extensions_to_consider = [".py"]
exclude_filenames
is an array of file names (and directories) you don't want to consider the script to count code for.
skip_file_error_list
is a boolean variable. If you wish to see a printout of all errors while counting, set to True. Otherwise set to False.
Run script using the Python compiler. To run in terminal
python path_to_file.py
or
python3 path_to_file.py
Upvotes: 2
Reputation: 19776
Based on Bryce93's answer, with code_only
option to exclude comments, docstrings, and empty lines from line count:
import os
def countlines(rootdir, total_lines=0, header=True, begin_start=None,
code_only=True):
def _get_new_lines(source):
total = len(source)
i = 0
while i < len(source):
line = source[i]
trimline = line.lstrip(" ")
if trimline.startswith('#') or trimline == '':
total -= 1
elif '"""' in trimline: # docstring begin
if trimline.count('"""') == 2: # docstring end on same line
total -= 1
i += 1
continue
doc_start = i
i += 1
while '"""' not in source[i]: # docstring end
i += 1
doc_end = i
total -= (doc_end - doc_start + 1)
i += 1
return total
if header:
print('{:>10} |{:>10} | {:<20}'.format('ADDED', 'TOTAL', 'FILE'))
print('{:->11}|{:->11}|{:->20}'.format('', '', ''))
for name in os.listdir(rootdir):
file = os.path.join(rootdir, name)
if os.path.isfile(file) and file.endswith('.py'):
with open(file, 'r') as f:
source = f.readlines()
if code_only:
new_lines = _get_new_lines(source)
else:
new_lines = len(source)
total_lines += new_lines
if begin_start is not None:
reldir_of_file = '.' + file.replace(begin_start, '')
else:
reldir_of_file = '.' + file.replace(rootdir, '')
print('{:>10} |{:>10} | {:<20}'.format(
new_lines, total_lines, reldir_of_file))
for file in os.listdir(rootdir):
file = os.path.join(rootdir, file)
if os.path.isdir(file):
total_lines = countlines(file, total_lines, header=False,
begin_start=rootdir, code_only=code_only)
return total_lines
Upvotes: 1
Reputation:
As an addition to the pygount
answer, they just added the option --format=summary
to get the total number of lines in different file types in a directory.
pygount --format=summary ./your-directory
could output somthing like
Language Code % Comment %
------------- ---- ------ ------- ------
XML 1668 48.56 10 0.99
Python 746 21.72 150 14.90
TeX 725 21.11 57 5.66
HTML 191 5.56 0 0.00
markdown 58 1.69 0 0.00
JSON 37 1.08 0 0.00
INI 10 0.29 0 0.00
Text 0 0.00 790 78.45
__duplicate__ 0 0.00 0 0.00
------------- ---- ------ ------- ------
Sum total 3435 1007
Upvotes: 38
Reputation: 381
pygount
will display all the files in the folder, each with a count of codes lines (excluding documentation)
https://pypi.org/project/pygount/
pip install pygount
To list the results for the current directory run:
pygount ~/path_to_directory
Upvotes: 28
Reputation: 509
This has a slight air of homework assignment :-) -- nonetheless, it's a worthwhile exercise, and Bryce93's formatting is nice. I think many would be unlikely to use Python for this given that it can be done quickly with a couple of shell commands, for example:
cat $(find . -name "*.py") | grep -E -v '^\s*$|^\s*#' | wc -l
Note that none of these solutions accounts for multiline ('''
) comments.
Upvotes: 13
Reputation: 481
Here's a function I wrote to count all lines of code in a python package and print an informative output. It will count all lines in all .py
import os
def countlines(start, lines=0, header=True, begin_start=None):
if header:
print('{:>10} |{:>10} | {:<20}'.format('ADDED', 'TOTAL', 'FILE'))
print('{:->11}|{:->11}|{:->20}'.format('', '', ''))
for thing in os.listdir(start):
thing = os.path.join(start, thing)
if os.path.isfile(thing):
if thing.endswith('.py'):
with open(thing, 'r') as f:
newlines = f.readlines()
newlines = len(newlines)
lines += newlines
if begin_start is not None:
reldir_of_thing = '.' + thing.replace(begin_start, '')
else:
reldir_of_thing = '.' + thing.replace(start, '')
print('{:>10} |{:>10} | {:<20}'.format(
newlines, lines, reldir_of_thing))
for thing in os.listdir(start):
thing = os.path.join(start, thing)
if os.path.isdir(thing):
lines = countlines(thing, lines, header=False, begin_start=start)
return lines
To use it, just pass the directory you'd like to start in. For example, to count the lines of code in some package foo
:
countlines(r'...\foo')
Which would output something like:
ADDED | TOTAL | FILE
-----------|-----------|--------------------
5 | 5 | .\__init__.py
539 | 578 | .\bar.py
558 | 1136 | .\baz\qux.py
Upvotes: 38