Reputation: 95
I am trying to find the largest file in a directory structure so I can then use that info to help in the creation of a database.
Here is the code:
import os
import datetime
def get_files(target):
# Get file size and modified time for all files from the target directory and down.
# Initialize files list
filelist = []
# Walk the directory structure
for root, dirs, files in os.walk(target):
# Do not walk into directories that are mount points
dirs[:] = filter(lambda dir: not os.path.ismount(os.path.join(root, dir)), dirs)
for name in files:
# Construct absolute path for files
filename = os.path.join(root, name)
# Test the path to account for broken symlinks
if os.path.exists(filename):
# File size information in bytes
size = float(os.path.getsize(filename))
# Get the modified time of the file
#mtime = os.path.getmtime(filename)
# Create a tuple of filename, size, and modified time
construct = filename, size, #str(datetime.datetime.fromtimestamp(mtime))
# Add the tuple to the master filelist
filelist.append(construct)
print(sorted([filelist]))
# with open("/home/dave/sizes.txt", 'w') as size_file:
# contents = filelist.readline()
get_files("/home/dave/TL/case")
As you can see, I am a newbie and not sure how to pass the results of the function to a file.
My ultimate goal is just to find the largest file and it's size. It can go to a file or to stdout.
What am I missing?
Upvotes: 1
Views: 1329
Reputation: 180401
Just make your function a generator function and call max
using file size as the key with itemgetter(1)
:
import os
def get_files(target):
for root, dirs, files in os.walk(target):
# Do not walk into directories that are mount points
dirs[:] = filter(lambda d: not os.path.ismount(os.path.join(root, d)), dirs)
for name in files:
# Construct absolute path for files
filename = os.path.join(root, name)
# Test the path to account for broken symlinks
if os.path.exists(filename):
# File size information in bytes
yield filename, os.path.getsize(filename)
That will allow you to reuse the function how ever you like:
In [5]: from operator import itemgetter
In [6]: max(get_files("."),key=itemgetter(1))
Out[6]:
('./node_modules/browser-sync/node_modules/socket.io/node_modules/socket.io-parser/bg.gif',
1277113)
If you wanted to sort the files by name alphabetically:
sorted(get_files("path"))
Sort by size:
sorted(get_files("path"), key=itemgetter(1))
Upvotes: 2
Reputation: 1292
Here is one verbose way to do it. First I am creating a list of File Name and File Size tuples. Then I am iterating through the list and saving the largest file Name and Size.
import os
fileSizeTupleList = []
largestSize = 0
for i in os.listdir(os.curdir):
if os.path.isfile(i):
fileSizeTupleList.append((i, os.path.getsize(i)))
for fileName, fileSize in fileSizeTupleList:
if fileSize > largestSize:
largestSize = fileSize
largestFile = fileName
print(largestFile, largestSize)
Here is a recursive approach:
import os
fileSizeTupleList = []
largestSize = 0
for root, dirs, files in os.walk(os.curdir):
for file in files:
fileSizeTupleList.append((file, os.path.getsize(os.path.join(root, file))))
for fileName, fileSize in fileSizeTupleList:
if fileSize > largestSize:
largestSize = fileSize
largestFile = fileName
print(largestFile, largestSize)
Upvotes: 0