Reputation: 1014
What I’m trying to accomplish:
Problem:
I can do this with non-CTL languages like English:
from docx import Document
from docx.enum.style import WD_STYLE_TYPE
from docx.shared import Pt
user_font_name = 'FreeMono'
user_font_size = 14
doc = Document()
my_style = doc.styles.add_style('style_name',WD_STYLE_TYPE.PARAGRAPH)
my_font = my_style.font
my_font.name = user_font_name
my_font.size = Pt(user_font_size)
p = doc.add_paragraph('some text',my_style)
# persian_p = doc.add_paragraph('نوشته',my_style)
# FreeMono supports Persian language so the problem is not the font
doc.save('file.docx')
However if I change the text to a Persian text, its font won’t change to the specified font.
Why this happens:
How I know this:
Additional info:
Upvotes: 3
Views: 2746
Reputation: 1
I have solved the Bold text issue, the bold script of Complex script is identified by <w:bCs>, but python docx (run.bold=True) supports: <w:b>
so I have used your code, to make my already bolded run (using (run.bold=True)), a really bolded run:
my_bold_run = para.runs[1]
rpr = my_bold_run.element.get_or_add_rPr()
bCs = OxmlElement('w:bCs')
rpr.append(bCs)
bCs.set(qn('w:val'), "True")
I made function for font name and size and bolding, but still not sure about the language bidi, if it is necessary, here:
[it supports mixed languages paragraph]
def fix_cs_formatting_runs( run_to_fix ,user_cs_font_size, user_cs_font_name, user_is_bold ): #cs: complex script, ex, arabic
rpr = run_to_fix.element.get_or_add_rPr()
rFonts = rpr.get_or_add_rFonts()
rpr.get_or_add_sz()
szCs = OxmlElement('w:szCs') # size
sz= OxmlElement('w:sz') # size
rtl = OxmlElement('w:rtl') # size
rpr.append(szCs)
rpr.append(sz)
rpr.append(rtl)
lang = OxmlElement('w:lang') #language
rpr.append(lang)
if user_is_bold:
bCs = OxmlElement('w:bCs') #bold the complex language
rpr.append(bCs)
bCs.set(qn('w:val'), "True")
b = OxmlElement('w:b') # bold the english
rpr.append(b)
b.set(qn('w:val'), "True")
sz.set(qn('w:val'), str(int(user_cs_font_size * 2)))
szCs.set(qn('w:val'), str(int(user_cs_font_size * 2)))
lang.set(qn('w:bidi'), 'ar-SA')
rFonts.set(qn('w:cs'), user_cs_font_name)
rFonts.set(qn('w:ascii'), user_cs_font_name) #you can change the font for the other language
rFonts.set(qn('w:hAnsi'), user_cs_font_name) #you can change the font for the ot
her language
Upvotes: 0
Reputation: 544
I had a similar problem and added the support to the docx library. The forked docx code is in https://github.com/Oritk/python-docx Usage:
run = p.add_run(line)
#ru.font.size = Pt(8) ### This line is redundant - but you can leave it
run.font.cs_size = Pt(8)
run.font.rtl = True
Upvotes: 0
Reputation: 1014
After many hours poking around the docx file I realized much to my horror, that the answer lied in style.xml file of the document. Here’s a kind of way to fix it for people with similar problems:
Problems with Text Direction:
Xml explanation of the font changing problem:
The document with altered default style shows a couple of different things in its style.xml file. In Normal paragraph style under "w:rPr" you can see that there is an additional "w:szCs" that determines the size of complex script font (which you can’t change by changing style.font.size) and in "w:rFonts" the value for "cs" is now my specified Persian font. Also the "w:lang" value, “bidi”, is now “fa-IR” (for Persian). Here’s the xml part I’m talking about:
<w:rPr>
<w:rFonts w:ascii="FreeMono" w:hAnsi="FreeMono" w:cs="FreeFarsi"/>
<w:sz w:val="40"/>
<w:rtl/>
<w:cs/>
<w:szCs w:val="40"/>
<w:lang w:val="en-Us" w:bidi="fa-IR"/>
</w:rPr>
Now changing the style.font.size only changes "sz" value (western font size) and doesn’t do anything to "szCs" value (cs font size). And similarly style.font.name only changes "ascii" and "hAnsi" values of "w:rFonts" and doesn't do anything to "cs" value. So to change these values I had to change my style elements in python.
Solution:
from docx import Document
from docx.shared import Pt
#path to doc with altered style:
base_doc_location = 'base.docx'
doc = Document(base_doc_location)
my_style = doc.styles['Normal']
# define your desired fonts
user_cs_font_size = 16
user_cs_font_name = 'FreeFarsi'
user_en_font_size = 12
user_en_font_name = 'FreeMono'
# get <w:rPr> element of this style
rpr = my_style.element.rPr
#==================================================
'''This probably isn't necessary if you already
have a document with altered style, but just to be
safe I'm going to add this here'''
if rpr.rFonts is None:
rpr._add_rFonts()
if rpr.sz is None:
rpr._add_sz()
#==================================================
'''Get the nsmap string for rpr. This is that "w:"
at the start of elements and element values in xml.
Like these:
<w:rPr>
<w:rFonts>
w:val
The nsmap is like a url:
http://schemas.openxmlformats.org/...
Now w:rPr translates to:
{nsmap url string}rPr
So I made the w_nsmap string like this:'''
w_nsmap = '{'+rpr.nsmap['w']+'}'
#==================================================
'''Because I didn't find any better ways to get an
element based on its tag here's a not so great way
of getting it:
'''
szCs = None
lang = None
for element in rpr:
if element.tag == w_nsmap + 'szCs':
szCs = element
elif element.tag == w_nsmap + 'lang':
lang = element
'''if there is a szCs and lang element in your style
those variables will be assigned to it, and if not
we make those elements and add them to rpr'''
if szCs is None:
szCs = rpr.makeelement(w_nsmap+'szCs',nsmap=rpr.nsmap)
if lang is None:
lang = rpr.makeelement(w_nsmap+'lang',nsmap =rpr.nsmap)
rpr.append(szCs)
rpr.append(lang)
#==================================================
'''Now to set our desired values to these elements
we have to get attrib dictionary of these elements
and set the name of value as key and our value as
value for that dict'''
szCs_attrib = szCs.attrib
lang_attrib = lang.attrib
rFonts_atr = rpr.rFonts.attrib
'''sz and szCs values are string values and 2 times
the font size so if you want font size to be 11 you
have to set sz (for western fonts) or szCs (for CTL
fonts) to "22" '''
szCs_attrib[w_nsmap+'val'] =str(int(user_cs_font_size*2))
'''Now to change cs font and bidi lang values'''
rFonts_atr[w_nsmap+'cs'] = user_cs_font_name
lang_attrib[w_nsmap+'bidi'] = 'fa-IR' # For Persian
#==================================================
'''Because we changed default style we don't even
need to set style every time we add a new paragraph
And if you change font name or size the normal way
it won't change these cs values so you can have a
font for CTL language and a different font for
western language
'''
persian_p = doc.add_paragraph('نوشته')
en_font = my_style.font
en_font.name = user_en_font_name
en_font.size = Pt(user_en_font_size)
english_p = doc.add_paragraph('some text')
doc.save('ex.docx')
Edit (code improvement):
I commented the lines that could use some improvement and put the better lines underneath them.
#rpr = my_style.element.rPr # If None it'll throw errors later
rpr = my_style.element.get_or_add_rPr() # this avoids potential errors
#if rpr.rFonts is None:
# rpr._add_rFonts()
rFonts = rpr.get_or_add_rFonts()
#if rpr.sz is None:
# rpr._add_sz()
rpr.get_or_add_sz()
#by importing these you can make elements and set values quicker
from docx.oxml.shared import OxmlElement, qn
#szCs = rpr.makeelement(w_nsmap+'szCs',nsmap=rpr.nsmap)
szCs = OxmlElement('w:szCs')
#lang = rpr.makeelement(w_nsmap+'lang',nsmap =rpr.nsmap)
lang = OxmlElement('w:lang')
#szCs_attrib = szCs.attrib
#lang_attrib = lang.attrib
#rFonts_atr = rpr.rFonts.attrib
#szCs_attrib[w_nsmap+'val'] =str(int(user_cs_font_size*2))
#rFonts_atr[w_nsmap+'cs'] = user_cs_font_name
#lang_attrib[w_nsmap+'bidi'] = 'fa-IR'
szCs.set(qn('w:val'),str(int(user_cs_font_size*2)))
lang.set(qn('w:bidi'),'fa-IR')
rFonts.set(qn('w:cs'),user_cs_font_name)
Upvotes: 5