Reputation: 23
I am trying to delete all the slides that contains a specific text ('apple') from all the pptx files in multiple directories. The code below is able to accomplish that.
import os
import glob
import pptx
def delete_slide_with_name(presentation, name):
new_presentation = pptx.Presentation()
for slide in presentation.slides:
include_slide = True
for shape in slide.shapes:
if shape.has_text_frame:
for paragraph in shape.text_frame.paragraphs:
for run in paragraph.runs:
if name in run.text:
include_slide = False
break
if not include_slide:
break
if not include_slide:
break
if not include_slide:
break
if include_slide:
new_slide = new_presentation.slides.add_slide(slide.slide_layout)
for shp in slide.shapes:
el = shp.element
new_slide.shapes._spTree.insert_element_before(el, 'p:extLst')
return new_presentation
path = "C:/Study/Thesis/main_project_folder/Test_Sensitive_Data_Script"
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(".pptx"):
file_path = os.path.join(root, file)
presentation = pptx.Presentation(file_path)
new_presentation = delete_slide_with_name(presentation, "apple")
new_presentation.save(file_path)
But the problem is, although the code is able to delete the slide, it's not able to reproduce a readable file. When I try to open the file, a prompt pops up showing that the file needs to be repaired. After repairing, the content structure of the file has changed.
Before:
After:
How can I reproduce all the slides exactly as it was without the deleted slide?
Upvotes: 0
Views: 380
Reputation: 372
Is there a reason you don't just delete the slides rather than adding the slides you want to a new presentation?
import os
import glob
import pptx
def delete_slide_with_name(presentation, name):
xml_slides = presentation.slides._sldIdLst
slides = list(xml_slides)
for i in range(len(slides),0,-1):
slide = presentation.slides[i-1]
for shape in slide.shapes:
if shape.has_text_frame:
for paragraph in shape.text_frame.paragraphs:
for run in paragraph.runs:
if name in run.text:
xml_slides.remove(slides[i-1])
path = "C:\\Users\\lukeb\\Downloads\\test\\"
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(".pptx"):
file_path = os.path.join(root, file)
presentation = pptx.Presentation(file_path)
delete_slide_with_name(presentation, "apple")
presentation.save(file_path)
Upvotes: 1