Reputation: 125
I want to write persian text to pdf via PDFBox, however, pdfbox separates and reverse separated charachters. how should i fix that? ( i use pdfbox pdfbox-2.0.19 )
here is my code:
PDDocument doc = new PDDocument();
PDPage page = new PDPage(PDRectangle.A4);
doc.addPage(page);
PDPageContentStream cont = new PDPageContentStream(doc, page);
cont.beginText();
PDType0Font font = PDType0Font.load(doc, new File("C:\\Users\\farhad\\Downloads\\vazir-font-v24.1.0\\Vazir.ttf"));
int fontSize = 15;
cont.setFont(font, fontSize);
String text = "برنامه نویس ایرانی هست اینو بلد باشه؟";
float x = 50;
float y = page.getMediaBox().getHeight() / 2;
cont.newLineAtOffset(x, y);
cont.showText(text);
cont.endText();
cont.close();
doc.save("D:\\pdf.pdf");
and I atatched a screenshot which shows the result.
Upvotes: 1
Views: 1030
Reputation: 125
you should do some manipulation with Unicode characters. Every Persian character that you normally know (for example س ش ت ظ), actually has 4 different forms and each form has its own Unicode characters.
Let's say the word سا
in this case, the initial س has a Unicode character which is different than the س in راس which comes at the end of the word.
for better understanding, look at the picture below
just go to this website https://www.compart.com/en/unicode/ and search your character.
please note that, Arabic and Persian almost use the same script, that's why in the picture that I uploaded, it says "Arabic letter Seen isolated form" for س character.
Here is a class that you can use to calculate 4 different forms of Persian characters:
public class PersianCharachtersUnicode {
char c;
private String InitialFom_Unicode;
private String MedialForm_Unicode;
private String FinalForm_Unicode;
private String IsolatedForm_Unicode;
public void setCharc (char c) {
this.c = c;
calculate();
}
private void calculate() {
switch (c) {
case 'آ':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\0";
IsolatedForm_Unicode = "\uFE81";
break;
case 'ا':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\uFE8E";
IsolatedForm_Unicode = "\uFE8D";
break;
case 'ب':
InitialFom_Unicode = "\uFE91";
MedialForm_Unicode = "\uFE92";
FinalForm_Unicode = "\uFE90";
IsolatedForm_Unicode = "\uFE8F";
break;
case 'پ':
InitialFom_Unicode = "\uFB58";
MedialForm_Unicode = "\uFB59";
FinalForm_Unicode = "\uFB57";
IsolatedForm_Unicode = "\uFB56";
break;
case 'ت':
InitialFom_Unicode = "\uFE97";
MedialForm_Unicode = "\uFE98";
FinalForm_Unicode = "\uFE96";
IsolatedForm_Unicode = "\uFE95";
break;
case 'ث':
InitialFom_Unicode = "\uFE9B";
MedialForm_Unicode = "\uFE9C";
FinalForm_Unicode = "\uFE9A";
IsolatedForm_Unicode = "\uFE99";
break;
case 'ج':
InitialFom_Unicode = "\uFE9F";
MedialForm_Unicode = "\uFEA0";
FinalForm_Unicode = "\uFE9E";
IsolatedForm_Unicode = "\uFE9D";
break;
case 'چ':
InitialFom_Unicode = "\uFB7C";
MedialForm_Unicode = "\uFB7D";
FinalForm_Unicode = "\uFE9B";
IsolatedForm_Unicode = "\uFB7A";
break;
case 'ح':
InitialFom_Unicode = "\uFEA3";
MedialForm_Unicode = "\uFEA4";
FinalForm_Unicode = "\uFEA2";
IsolatedForm_Unicode = "\uFEA1";
break;
case 'خ':
InitialFom_Unicode = "\uFEA7";
MedialForm_Unicode = "\uFEA8";
FinalForm_Unicode = "\uFEA6";
IsolatedForm_Unicode = "\uFEA5";
break;
case 'د':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\uFEAA";
IsolatedForm_Unicode = "\uFEA9";
break;
case 'ذ':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\uFEAC";
IsolatedForm_Unicode = "\uFEAB";
break;
case 'ر':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\uFEAE";
IsolatedForm_Unicode = "\uFEAD";
break;
case 'ز':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\uFEB0";
IsolatedForm_Unicode = "\uFEAF";
break;
case 'ژ':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\uFB8B";
IsolatedForm_Unicode = "\uFB8A";
break;
case 'س':
InitialFom_Unicode = "\uFEB3";
MedialForm_Unicode = "\uFEB4";
FinalForm_Unicode = "\uFEB2";
IsolatedForm_Unicode = "\uFEB1";
break;
case 'ش':
InitialFom_Unicode = "\uFEB7";
MedialForm_Unicode = "\uFEB8";
FinalForm_Unicode = "\uFEB6";
IsolatedForm_Unicode = "\uFEB5";
break;
case 'ص':
InitialFom_Unicode = "\uFEBB";
MedialForm_Unicode = "\uFEBC";
FinalForm_Unicode = "\uFEBA";
IsolatedForm_Unicode = "\uFEB9";
break;
case 'ض':
InitialFom_Unicode = "\uFEBF";
MedialForm_Unicode = "\uFEC0";
FinalForm_Unicode = "\uFEBE";
IsolatedForm_Unicode = "\uFEBD";
break;
case 'ط':
InitialFom_Unicode = "\uFEC3";
MedialForm_Unicode = "\uFEC4";
FinalForm_Unicode = "\uFEC2";
IsolatedForm_Unicode = "\uFEC1";
break;
case 'ظ':
InitialFom_Unicode = "\uFEC7";
MedialForm_Unicode = "\uFEC8";
FinalForm_Unicode = "\uFEC6";
IsolatedForm_Unicode = "\uFEC5";
break;
case 'ع':
InitialFom_Unicode = "\uFECB";
MedialForm_Unicode = "\uFECC";
FinalForm_Unicode = "\uFECA";
IsolatedForm_Unicode = "\uFEC9";
break;
case 'غ':
InitialFom_Unicode = "\uFECF";
MedialForm_Unicode = "\uFED0";
FinalForm_Unicode = "\uFECE";
IsolatedForm_Unicode = "\uFECD";
break;
case 'ف':
InitialFom_Unicode = "\uFED3";
MedialForm_Unicode = "\uFED4";
FinalForm_Unicode = "\uFED2";
IsolatedForm_Unicode = "\uFED1";
break;
case 'ق':
InitialFom_Unicode = "\uFED7";
MedialForm_Unicode = "\uFED8";
FinalForm_Unicode = "\uFED6";
IsolatedForm_Unicode = "\uFED5";
break;
case 'ک':
InitialFom_Unicode = "\uFB90";
MedialForm_Unicode = "\uFB91";
FinalForm_Unicode = "\uFB8F";
IsolatedForm_Unicode = "\uFB8E";
break;
case 'گ':
InitialFom_Unicode = "\uFB94";
MedialForm_Unicode = "\uFB95";
FinalForm_Unicode = "\uFB93";
IsolatedForm_Unicode = "\uFB92";
break;
case 'ل':
InitialFom_Unicode = "\uFEDF";
MedialForm_Unicode = "\uFEE0";
FinalForm_Unicode = "\uFEDE";
IsolatedForm_Unicode = "\uFEDD";
break;
case 'م':
InitialFom_Unicode = "\uFEE3";
MedialForm_Unicode = "\uFEE4";
FinalForm_Unicode = "\uFEE2";
IsolatedForm_Unicode = "\uFEE1";
break;
case 'ن':
InitialFom_Unicode = "\uFEE7";
MedialForm_Unicode = "\uFEE8";
FinalForm_Unicode = "\uFEE6";
IsolatedForm_Unicode = "\uFEE5";
break;
case 'و':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\uFEEE";
IsolatedForm_Unicode = "\uFEED";
break;
case 'ه':
InitialFom_Unicode = "\uFEEB";
MedialForm_Unicode = "\uFEEC";
FinalForm_Unicode = "\uFEEA";
IsolatedForm_Unicode = "\uFEE9";
break;
case 'ی':
InitialFom_Unicode = "\uFBFE";
MedialForm_Unicode = "\uFBFF";
FinalForm_Unicode = "\uFBFD";
IsolatedForm_Unicode = "\uFBFC";
break;
default:
break;
}
}
/**
* @return the initialFom_Unicode
*/
public String getInitialFom_Unicode() {
return InitialFom_Unicode;
}
/**
* @return the finalForm_Unicode
*/
public String getFinalForm_Unicode() {
return FinalForm_Unicode;
}
/**
* @return the isolatedForm_Unicode
*/
public String getIsolatedForm_Unicode() {
return IsolatedForm_Unicode;
}
/**
* @return the medialForm_Unicode
*/
public String getMedialForm_Unicode() {
return MedialForm_Unicode;
}
}
Upvotes: 3