Reputation: 613
I'm working on creating a Python generated report that uses Pandas DataFrames. Currently I am using the DataFrame.to_string()
method. However this writes to the file as a string. Is there a way for me to achieve this while keeping it as a table so I can use table formating.
Code:
SEMorgkeys = client.domain_organic(url, database = "us", display_limit = 10, export_columns=["Ph,Pp,Pd,Nq,Cp,Ur,Tr"])
org_df = pd.DataFrame(SEMorgkeys)
f = open(name, 'w')
f.write("\nOrganic:\n")
f.write(org_df.to_string(index=False,justify="left"))
f.close()
Current Printout (as string):
CPC Keyword Position Difference Previous Position Search Volume Traffic (%) Url
75.92 small business factoring 0 1 210 11.69 https://www..com/small-business-f...
80.19 factoring company 0 8 1600 5.72 https://www..com/factoring-vs-ban...
Upvotes: 32
Views: 74212
Reputation: 21
Could also be done like:
import pandas as pd
from docx import Document
# init the word doc
document = Document()
# create the df
df = pd.DataFrame(data)
df_columns = df.columns # to write all columns
# df_columns = ['col1', 'col4'] # to write only a subset of columns
# add the table in the word doc
table = document.add_table(rows=1, cols=len(df_columns))
# write the table header
table_header = table.rows[0].cells
for idx, this_col in enumerate(df_columns): table_header[idx].text = this_col
# write the table data
for this_row in df.values.tolist():
row_cells = table.add_row().cells
for idx, this_cell in enumerate(this_row):
row_cells[idx].text = str(this_cell)
document.save('mydoc.docx')
Upvotes: 0
Reputation: 116
If you
try:
df.to_excel("file_name.xlsx")
And then just open it and copy the required cells to your Word document
Upvotes: 1
Reputation: 366
Inspired by the answers above, I have added a function with the ability to include the index.
import docx
import pandas as pd
from pathlib import Path
def pd_table_to_word(df, save_to_path, include_index=False):
if Path(save_to_path).exists():
response = input("Document already exists and will be overwritten. Sure you want to overwrite this documents? Y/ N")
if response.lower() not in ["y", "ye", "yes", "yeah"]:
return "Aborted overwriting file."
doc = docx.Document()
# add a table to the end and create a reference variable
# extra row is so we can add the header row
n_rows, n_cols = df.shape[0], df.shape[1] +1
if include_index:
n_rows += 1
t = doc.add_table(n_rows, n_cols)
# add the header rows.
for j in range(df.shape[-1]):
if include_index:
t.cell(0,j+1).text = df.columns[j]
else:
t.cell(0,j).text = df.columns[j]
# add index names
if include_index:
t.cell(0, 0).text = df.index.name
for i in range(df.shape[0]):
t.cell(i+1, 0).text = df.index[i]
# add the rest of the data frame
for i in range(df.shape[0]):
for j in range(df.shape[-1]):
if include_index:
t.cell(i+1, j+1).text = str(df.values[i,j])
else:
t.cell(i+1, j).text = str(df.values[i,j])
doc.save(save_to_path)
return f"Table saved to {save_to_path}"
Upvotes: 1
Reputation: 91
use this and prove it if you like:
from docx import Document
import pandas as pd
def df_to_word(data: dict, report_name:str) -> docx.Document:
assert type(data) == dict, 'data has to be dict'
assert '.docx' in report_name, 'report_name has to be a .docx file'
df = pd.DataFrame(data)
doc = docx.Document()
table = doc.add_table(df.shape[0]+1, df.shape[1])
for j in range(df.shape[-1]):
table.cell(0,j).text = df.columns[j]
for i in range(df.shape[0]):
for j in range(df.shape[-1]):
table.cell(i+1,j).text = str(df.values[i,j])
doc.save(f'./{report_name}')
data = {
"calorierbes": [420, 380, 390],
"duratierbn": [50, 40, 45],
"durationverg": [50, 40, 45],
"duratiorgern": [50, 40, 45],
"calorieers": [420, 380, 390],
"calorierbers": [420, 380, 390],
"calorierbes": [420, 380, 390]
}
df_to_word(data, 'report_4.docx')
Upvotes: 1
Reputation: 568
def doctable(data, tabletitle, pathfile):
from docx import Document
from docx.shared import Pt, Mm
import pandas as pd
document = Document()
section = document.sections[0]
section.page_height = Mm(297)
section.page_width = Mm(210)
section.left_margin = Mm(20)
section.right_margin = Mm(20)
section.top_margin = Mm(20)
section.bottom_margin = Mm(20)
section.header_distance = Mm(12.7)
section.footer_distance = Mm(12.7)
data = pd.DataFrame(data) # My input data is in the 2D list form
document.add_heading(tabletitle)
table = document.add_table(rows=(data.shape[0]), cols=data.shape[1]) # First row are table headers!
table.allow_autofit = True
table.autofit = True
for i, column in enumerate(data) :
for row in range(data.shape[0]) :
table.cell(row, i).text = str(data[column][row])
document.save(pathfile)
return 0
Upvotes: 7
Reputation: 36608
You can write the table straight into a .docx
file using the python-docx
library.
If you are using the Conda or installed Python using Anaconda, you can run the command from the command line:
conda install python-docx --channel conda-forge
Or to pip install from the command line:
pip install python-docx
After that is installed, we can use it to open the file, add a table, and then populate the table's cell text with the data frame data.
import docx
import pandas as pd
# i am not sure how you are getting your data, but you said it is a
# pandas data frame
df = pd.DataFrame(data)
# open an existing document
doc = docx.Document('./test.docx')
# add a table to the end and create a reference variable
# extra row is so we can add the header row
t = doc.add_table(df.shape[0]+1, df.shape[1])
# add the header rows.
for j in range(df.shape[-1]):
t.cell(0,j).text = df.columns[j]
# add the rest of the data frame
for i in range(df.shape[0]):
for j in range(df.shape[-1]):
t.cell(i+1,j).text = str(df.values[i,j])
# save the doc
doc.save('./test.docx')
Upvotes: 59