Reputation: 74
I have a more than 1000 .csv
files (data_1.csv......data1000.csv), each containing X and Y values!
x1 y1 x2 y2
5.0 60 5.5 500
6.0 70 6.5 600
7.0 80 7.5 700
8.0 90 8.5 800
9.0 100 9.5 900
I have made a subplot program in python which can give two plots (plot1 - X1vsY1, Plot2 - X2vsY2) at a time using one file.
I need help in looping all the files, (open a file, read it, plot it, pick another file, open it, read it, plot it, ... until all the files in a folder get plotted)
I have the following code:
import pandas as pd
import matplotlib.pyplot as plt
df1=pd.read_csv("data_csv",header=1,sep=',')
fig = plt.figure()
plt.subplot(2, 1, 1)
plt.plot(df1.iloc[:,[1]],df1.iloc[:,[2]])
plt.subplot(2, 1, 2)
plt.plot(df1.iloc[:,[3]],df1.iloc[:,[4]])
plt.show()
How can this be accomplished more efficiently?
Upvotes: 4
Views: 13446
Reputation: 62403
p = Path(...)
: p
→ WindowsPath('so_data/files')
files = p.rglob(...)
yields all files matching the pattern
file[0]
→ WindowsPath('so_data/files/data_1.csv')
p.parent / 'plots' / f'{file.stem}.png'
→ WindowsPath('so_data/plots/data_1.png')
p.parent
→ WindowsPath('so_data')
file.stem
→ data_1
pandas
, as does the OP.pandas.DataFrame.plot
, which uses matplotlib
as the default backend.
.iloc
to specify the columns, and then x=0
will always be the x-axis data, based on the given example data.python 3.8.11
, pandas 1.3.2
, matplotlib 3.4.3
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
p = Path('so_data/files') # specify the path to the files
files = p.rglob('data_*.csv') # generator for all files based on rglob pattern
for file in files:
df = pd.read_csv(file, header=0, sep=',') # specify header row and separator as needed
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(7, 5))
df.iloc[:, [0, 1]].plot(x=0, ax=ax1) # plot 1st x/y pair; assumes x data is at position 0
df.iloc[:, [2, 3]].plot(x=0, ax=ax2) # plot 2nd x/y pair; assumes x data is at position 0
fig.savefig(p.parent / 'plots' / f'{file.stem}.png')
plt.close(fig) # close each figure, otherwise they stay in memory
so_data/files
directory manually.df = pd.DataFrame({'x1': [5.0, 6.0, 7.0, 8.0, 9.0], 'y1': [60, 70, 80, 90, 100], 'x2': [5.5, 6.5, 7.5, 8.5, 9.5], 'y2': [500, 600, 700, 800, 900]})
for x in range(1, 1001):
df.to_csv(f'so_data/files/data_{x}.csv', index=False)
df.column
creates an array of columns, that can be chunked into pairs
list(zip(*[iter(df.columns)]*2))
→ [('x1', 'y1'), ('x2', 'y2')]
.loc
, since there will be column names, instead of .iloc
for column indices.p = Path('so_data/files')
files = p.rglob('data_*.csv')
for file in files:
df = pd.read_csv(file, header=0, sep=',')
col_pair = list(zip(*[iter(df.columns)]*2)) # extract column pairs
fig, axes = plt.subplots(len(col_pair), 1) # a number of subplots based on number of col_pairs
axes = axes.ravel() # flatten the axes if necessary
for cols, ax in zip(col_pair, axes):
df.loc[:, cols].plot(x=0, ax=ax) # assumes x data is at position 0
fig.savefig(p.parent / 'plots' / f'{file.stem}.png')
plt.close(fig)
Upvotes: 0
Reputation: 124
If for some reason @Neill Herbst answer didnt work as expected (i consider the easiest way) I run with a problem reading the files I rearrenged the code that worked for me
import glob
import pandas as pd
import matplotlib.pyplot as plt
os.chdir(r'path')
for file in glob.glob("*.csv")::
df1=pd.read_csv(file,header=1,sep=',')
fig = plt.figure()
plt.subplot(2, 1, 1)
plt.plot(df1.iloc[:,[1]],df1.iloc[:,[2]])
plt.subplot(2, 1, 2)
plt.plot(df1.iloc[:,[3]],df1.iloc[:,[4]])
plt.show() # plot one csv when you close it, plots next one
#plt.show <------ if u want to see all the plots in different windows
Upvotes: 0
Reputation: 26
I used NetCDF(.nc) just in case anyone is interested in using NetCDF data. Also, you could replace it with .txt too, the idea is the same. I used this for a contour plot loop.
path_to_folder='#type the path to the files'
count=0
fig = plt.figure(figsize=(10,5))
files = []
for i in os.listdir(path_to_folder):
if i.endswith('.nc'):
count=count+1
files.append(open(i))
data=xr.open_dataset(i)
prec=data['tp']
plt.subplot(1, 2, count) # change 1 and 2 to the shape you want
prec.groupby('time.month').mean(dim=('time','longitude')).T.plot.contourf(cmap='Purples') *#this is to plot contour plot but u can replace with any plot command
print(files)
plt.savefig('try,png',dpi=500,orientation='landscape',format='png')
Upvotes: 1
Reputation: 2122
You can generate a list of filenames using glob
and then plot them in a for loop.
import glob
import pandas as pd
import matplotlib.pyplot as plt
files = glob.glob(# file pattern something like '*.csv')
for file in files:
df1=pd.read_csv(file,header=1,sep=',')
fig = plt.figure()
plt.subplot(2, 1, 1)
plt.plot(df1.iloc[:,[1]],df1.iloc[:,[2]])
plt.subplot(2, 1, 2)
plt.plot(df1.iloc[:,[3]],df1.iloc[:,[4]])
plt.show() # this wil stop the loop until you close the plot
Upvotes: 4
Reputation:
What we want to do is for each iteration, or file, create a new empty list. So for each iteration the data will be plotted, but once that data has been plotted a new empty list will be created, and plotted. Once all the data from each file has been plotted, then you want to finally to plt.show() which will show all the plots together. Here is a link to a similar problem I was having: Traceback lines on plot of multiple files. Goog luck!
import csv
import matplotlib.pyplot as plt
def graphWriter():
for file in os.listdir(os.getcwd()):
List1 = []
List2 = []
List3 = []
List4 = []
with open(filename, 'r') as file:
for col in csv.DictReader(file):
List1.append(col['x1'])
List2.append(col['y1'])
List3.append(col['x2'])
List4.append(col['y2'])
plt.subplot(2, 1, 1)
plt.grid(True)
colors = np.random.rand(2)
plt.plot(List1,List2,c=colors)
plt.tick_params(axis='both', which='major', labelsize=8)
plt.subplot(2, 1, 2)
plt.grid(True)
colors = np.random.rand(2)
plt.plot(List1,List3,c=colors)
plt.tick_params(axis='both', which='major', labelsize=8)
plt.show()
plt.gcf().clear()
plt.close('all')
Upvotes: 0
Reputation: 74
# plotting all the file data and saving the plots
import os
import csv
import matplotlib.pyplot as plt
def graphWriterIRIandRut():
m = 0
List1 = []
List2 = []
List3 = []
List4 = []
fileList = []
for file in os.listdir(os.getcwd()):
fileList.append(file)
while m < len(fileList):
for col in csv.DictReader(open(fileList[m],'rU')):
List1.append(col['x1'])
List2.append(col['y1'])
List3.append(col['x2'])
List4.append(col['y2'])
plt.subplot(2, 1, 1)
plt.grid(True)
# colors = np.random.rand(2)
plt.plot(List1,List2,c=colors)
plt.tick_params(axis='both', which='major', labelsize=8)
plt.subplot(2, 1, 2)
plt.grid(True)
# colors = np.random.rand(2)
plt.plot(List1,List3,c=colors)
plt.tick_params(axis='both', which='major', labelsize=8)
m = m + 1
continue
plt.show()
plt.gcf().clear()
plt.close('all')
Upvotes: 0
Reputation:
Here is the basic setup for what am using here at work. This code will plot the data from each file and through each file separately. This will work on any number of files as long as column names remain the same. Just direct it to the proper folder.
import os
import csv
def graphWriterIRIandRut():
m = 0
List1 = []
List2 = []
List3 = []
List4 = []
fileList = []
for file in os.listdir(os.getcwd()):
fileList.append(file)
while m < len(fileList):
for col in csv.DictReader(open(fileList[m],'rU')):
List1.append(col['Col 1 Name'])
List2.append(col['Col 2 Name'])
List3.append(col['Col 3 Name'])
List4.append(col['Col 4 Name'])
plt.subplot(2, 1, 1)
plt.grid(True)
colors = np.random.rand(n)
plt.plot(List1,List2,c=colors)
plt.tick_params(axis='both', which='major', labelsize=8)
plt.subplot(2, 1, 2)
plt.grid(True)
colors = np.random.rand(n)
plt.plot(List1,List3,c=colors)
plt.tick_params(axis='both', which='major', labelsize=8)
m = m + 1
continue
plt.show()
plt.gcf().clear()
plt.close('all')
Upvotes: 0