Reputation: 1729
I have a project whose lines of code I want to count. Is it possible to count all the lines of code in the file directory containing the project by using Python?
Upvotes: 34
Views: 53269
Reputation: 475
I just did a variant of @Bryce93 's response for a python + flask project(s)... ran some pivot tables on the outcome .csv file and the like (I manually marked files as 'active' downstream)... cheers
import os
import pandas as pd
def countlines(start, begin_start=None):
global files
for thing in os.listdir(start):
thing = os.path.join(start, thing)
if os.path.isfile(thing):
if thing.endswith('.py') or thing.endswith('.html'):
with open(thing, 'r') as f:
lines = f.readlines()
count = len([l for l in lines if not l.strip().startswith('#')])
functions, classes, comments = 0, 0, 0
if thing.endswith('.py'):
functions = len([
l for l in lines if l.strip().startswith('def ')
and l.strip().endswith('):')
classes = len([
l for l in lines if l.strip().startswith('class ')
and l.strip().endswith('):')
comments = len([l for l in lines if l.strip().startswith('#')])
language = 'python'
elif thing.endswith('.html'):
comments = len([l for l in lines if l.strip().startswith('<!--')])
language = 'jinja'
raise Exception(thing)
path = str(thing)
folder = '/'.join(path.split(repo)[-1].split('/')[:-1])
'path': path,
'repo': repo,
'language': language,
'filetype': thing.split('.')[-1],
'folder': folder,
'filename': thing.split('/')[-1],
'lines': count,
'functions': functions,
'classes': classes,
'comments': comments,
for thing in os.listdir(start):
thing = os.path.join(start, thing)
if os.path.isdir(thing):
countlines(thing, begin_start=start)
files = []
repo = '<repo1>'
master = pd.DataFrame(files)
files = []
repo = '<repo2>'
master = pd.concat([master, pd.DataFrame(files)], ignore_index=False, sort=False)
master['active'] = False
master.sort_values(by=['repo', 'folder', 'language', 'filename'])
Upvotes: 1
Use radon
python3 -mpip install radon
radon raw -s pkg_dir/
** Total **
LOC: 2994
LLOC: 1768
SLOC: 1739
Comments: 71
Single comments: 29
Multi: 818
Blank: 408
- Comment Stats
(C % L): 2%
(C % S): 4%
(C + M % L): 30%
it will also calculate cyclomatic complexity
a@debian:~/build/clean/scte35-threefive$ radon cc -a threefive
M 61:4 SCTE35Base.kv_clean - A
M 85:4 SCTE35Base.load - A
M 95:4 SCTE35Base._chk_var - A
C 9:0 SCTE35Base - A
M 34:4 SCTE35Base.as_hms - A
M 79:4 SCTE35Base._chk_nbin - A
M 17:4 SCTE35Base.__repr__ - A
M 20:4 SCTE35Base.as_90k - A
M 27:4 SCTE35Base.as_ticks - A
M 48:4 SCTE35Base.get - A
M 54:4 SCTE35Base.get_json - A
C 9:0 BitBin - A
M 30:4 BitBin.as_int - A
M 47:4 BitBin.as_charset - A
C 99:0 NBin - A
M 133:4 NBin.add_int - A
M 170:4 NBin.reserve - A
246 blocks (classes, functions, methods) analyzed.
Average complexity: A (1.9024390243902438)
Upvotes: 1
Reputation: 14255
Here's another one, using pathlib
. Lists individual (relative) file paths with line count, total number of files, and total line count.
import pathlib
class LoC(object):
suffixes = ['.py']
skip = ['name of dir or file to skip', ...]
def count(self, path, init=True):
path = pathlib.Path(path)
if in self.skip:
print(f'skipped: {path.relative_to(self.root)}')
if init:
self.root = path
self.files = 0
self.lines = 0
if path.is_dir():
# recursive case
for item in path.iterdir():
self.count(path=item, init=False)
elif path.is_file() and path.suffix in self.suffixes:
# base case
with'r') as f:
line_count = len(f.readlines())
print(f'{path.relative_to(self.root)}: {line_count}')
self.files += 1
self.lines += line_count
if init:
print(f'\n{self.lines} lines in {self.files} files')
Note I omitted the __init__
method for clarity.
Usage example:
loc = LoC()
Upvotes: 3
Reputation: 126
This is derived from Daniel's answer (though refactored enough that this won't be obvious). That one doesn't recurse through subdirectories, which is the behavior I wanted.
from os import listdir
from os.path import isfile, isdir, join
def item_line_count(path):
if isdir(path):
return dir_line_count(path)
elif isfile(path):
return len(open(path, 'rb').readlines())
return 0
def dir_line_count(dir):
return sum(map(lambda item: item_line_count(join(dir, item)), listdir(dir)))
Upvotes: 3
Reputation: 1729
from os import listdir
from os.path import isfile, join
def countLinesInPath(path,directory):
for line in open(join(directory,path), encoding="utf8"):
return count
def countLines(paths,directory):
for path in paths:
return count
def getPaths(directory):
return [f for f in listdir(directory) if isfile(join(directory, f))]
def countIn(directory):
return countLines(getPaths(directory),directory)
To count all the lines of code in the files in a directory, call the "countIn" function, passing the directory as a parameter.
Upvotes: 3
Reputation: 21
If you want to count how many lines are in your project, create a script inside of your project folder and paste the following into it:
import os
directory = "[project_directory]"
directory_depth = 100 # How deep you would like to go
extensions_to_consider = [".py", ".css"] # Change to ["all"] to include all extensions
exclude_filenames = ["venv", ".idea", "__pycache__", "cache"]
skip_file_error_list = True
this_file_dir = os.path.realpath(__file__)
print("Path to ignore:", this_file_dir)
def _walk(path, depth):
"""Recursively list files and directories up to a certain depth"""
depth -= 1
with os.scandir(path) as p:
for entry in p:
skip_entry = False
for fName in exclude_filenames:
if entry.path.endswith(fName):
skip_entry = True
if skip_entry:
print("Skipping entry", entry.path)
yield entry.path
if entry.is_dir() and depth > 0:
yield from _walk(entry.path, depth)
print("Caching entries")
files = list(_walk(directory, directory_depth))
print("Counting Lines")
file_err_list = []
line_count = 0
len_files = len(files)
for i, file_dir in enumerate(files):
if file_dir == this_file_dir:
print("=[Rejected file directory", file_dir, "]=")
if not os.path.isfile(file_dir):
skip_File = True
for ending in extensions_to_consider:
if file_dir.endswith(ending) or ending == "all":
skip_File = False
if not skip_File:
file = open(file_dir, "r")
local_count = 0
for line in file:
if line != "\n":
local_count += 1
print("({:.1f}%)".format(100*i/len_files), file_dir, "|", local_count)
line_count += local_count
print("File Count Errors:", len(file_err_list))
if not skip_file_error_list:
for file in file_err_list:
print("Total lines |", line_count)
There's probably faster and more efficient ways to do this, but this is a nice start.
is the project directory you want to be counted
is how deep within the project infastructure
i.e. a depth of 3 would mean it will only scan the following depth:
is the file extensions to count code. If you only want to count .py files, you set extensions_to_consider = [".py"]
is an array of file names (and directories) you don't want to consider the script to count code for.
is a boolean variable. If you wish to see a printout of all errors while counting, set to True. Otherwise set to False.
Run script using the Python compiler. To run in terminal
Upvotes: 2
Reputation: 19776
Based on Bryce93's answer, with code_only
option to exclude comments, docstrings, and empty lines from line count:
import os
def countlines(rootdir, total_lines=0, header=True, begin_start=None,
def _get_new_lines(source):
total = len(source)
i = 0
while i < len(source):
line = source[i]
trimline = line.lstrip(" ")
if trimline.startswith('#') or trimline == '':
total -= 1
elif '"""' in trimline: # docstring begin
if trimline.count('"""') == 2: # docstring end on same line
total -= 1
i += 1
doc_start = i
i += 1
while '"""' not in source[i]: # docstring end
i += 1
doc_end = i
total -= (doc_end - doc_start + 1)
i += 1
return total
if header:
print('{:>10} |{:>10} | {:<20}'.format('ADDED', 'TOTAL', 'FILE'))
print('{:->11}|{:->11}|{:->20}'.format('', '', ''))
for name in os.listdir(rootdir):
file = os.path.join(rootdir, name)
if os.path.isfile(file) and file.endswith('.py'):
with open(file, 'r') as f:
source = f.readlines()
if code_only:
new_lines = _get_new_lines(source)
new_lines = len(source)
total_lines += new_lines
if begin_start is not None:
reldir_of_file = '.' + file.replace(begin_start, '')
reldir_of_file = '.' + file.replace(rootdir, '')
print('{:>10} |{:>10} | {:<20}'.format(
new_lines, total_lines, reldir_of_file))
for file in os.listdir(rootdir):
file = os.path.join(rootdir, file)
if os.path.isdir(file):
total_lines = countlines(file, total_lines, header=False,
begin_start=rootdir, code_only=code_only)
return total_lines
Upvotes: 1
As an addition to the pygount
answer, they just added the option --format=summary
to get the total number of lines in different file types in a directory.
pygount --format=summary ./your-directory
could output somthing like
Language Code % Comment %
------------- ---- ------ ------- ------
XML 1668 48.56 10 0.99
Python 746 21.72 150 14.90
TeX 725 21.11 57 5.66
HTML 191 5.56 0 0.00
markdown 58 1.69 0 0.00
JSON 37 1.08 0 0.00
INI 10 0.29 0 0.00
Text 0 0.00 790 78.45
__duplicate__ 0 0.00 0 0.00
------------- ---- ------ ------- ------
Sum total 3435 1007
Upvotes: 38
Reputation: 381
will display all the files in the folder, each with a count of codes lines (excluding documentation)
pip install pygount
To list the results for the current directory run:
pygount ~/path_to_directory
Upvotes: 28
Reputation: 509
This has a slight air of homework assignment :-) -- nonetheless, it's a worthwhile exercise, and Bryce93's formatting is nice. I think many would be unlikely to use Python for this given that it can be done quickly with a couple of shell commands, for example:
cat $(find . -name "*.py") | grep -E -v '^\s*$|^\s*#' | wc -l
Note that none of these solutions accounts for multiline ('''
) comments.
Upvotes: 13
Reputation: 481
Here's a function I wrote to count all lines of code in a python package and print an informative output. It will count all lines in all .py
import os
def countlines(start, lines=0, header=True, begin_start=None):
if header:
print('{:>10} |{:>10} | {:<20}'.format('ADDED', 'TOTAL', 'FILE'))
print('{:->11}|{:->11}|{:->20}'.format('', '', ''))
for thing in os.listdir(start):
thing = os.path.join(start, thing)
if os.path.isfile(thing):
if thing.endswith('.py'):
with open(thing, 'r') as f:
newlines = f.readlines()
newlines = len(newlines)
lines += newlines
if begin_start is not None:
reldir_of_thing = '.' + thing.replace(begin_start, '')
reldir_of_thing = '.' + thing.replace(start, '')
print('{:>10} |{:>10} | {:<20}'.format(
newlines, lines, reldir_of_thing))
for thing in os.listdir(start):
thing = os.path.join(start, thing)
if os.path.isdir(thing):
lines = countlines(thing, lines, header=False, begin_start=start)
return lines
To use it, just pass the directory you'd like to start in. For example, to count the lines of code in some package foo
Which would output something like:
5 | 5 | .\
539 | 578 | .\
558 | 1136 | .\baz\
Upvotes: 38