Reputation: 11
I have a Syncfusion React PDF viewer and am performing OCR using Azure Cognitive Service.
When the user searches in the PDF file, the Azure services will return the bounds. Using these bounds, I need to highlight the text in the PDF file. I have shared below the response I get from the Azure Cognitive Services.
How can I highlight the text in my PDF file using these bounds?
{
"docs": [
{
"id": null,
"fileName": "sample.pdf",
"pages": [
{
"id": 0,
"box": "0 0 1224 1584",
"words": [
{
"id": 42,
"text": "patient",
"box": "366 156 403 169"
},
{
"id": 1258,
"text": "Patient",
"box": "35 1373 83 1386"
}
]
},
{
"id": 1,
"box": "0 0 1224 1584",
"words": [
{
"id": 11,
"text": "Patient:",
"box": "98 124 179 146"
}
]
}
]
}
]
}
Upvotes: 0
Views: 376
Reputation: 11
I can able to set bounds in the pdf viewer. Here I have given below the code snippet. Here 1.5 is calculated using the below formula.
page ratio = Azure cognitive page size width or height / PDF Viewer page size width or height
from the above am getting a page ratio = 1.5, then am dividing the x value and y value
var word = '694 170 836 212';
var wordBox = word.split(' ');
var xVal = parseFloat(wordBox[0] / 1.5);
var yVal = parseFloat(wordBox[1] / 1.5);
var widthVal = parseFloat(xVal);
var heightVal = parseFloat(wordBox[3]) - yVal;
pdfViewer.annotation.addAnnotation("Highlight", {
bounds: [{ x: xVal, y: yVal, width: 90, height: 25 }],
pageNumber: 0,
}
);
Upvotes: 0
Reputation: 3533
The below sample JavaScript code is for JSON data representing the words and pages you want to highlight.
The highlightPDFFromJSON
function is used to highlight words in the PDF based on the provided JSON data. A JSON object (jsonData
) is provided, containing information about the words to be highlighted on each page.
Use the sample function below after calling your function code.
const { PDFDocument, rgb } = require('pdf-lib');
const fs = require('fs').promises;
const axios = require('axios');
async function downloadPDF(url, outputPath) {
const response = await axios.get(url, { responseType: 'arraybuffer' });
await fs.writeFile(outputPath, Buffer.from(response.data));
}
async function highlightPDFFromJSON(inputPath, outputPath, jsonData) {
try {
// Read the existing PDF
const pdfBytes = await fs.readFile(inputPath);
// Load the PDF
const pdfDoc = await PDFDocument.load(pdfBytes);
// Loop through each page in the JSON data
for (const pageData of jsonData.docs[0].pages) {
const page = pdfDoc.getPages()[pageData.id];
// Loop through words on the page and highlight their boxes
for (const word of pageData.words) {
const box = word.box.split(' ').map(parseFloat); // Parse box coordinates
// Draw rectangle for word highlight
page.drawRectangle({
x: box[0],
y: box[1],
width: box[2] - box[0],
height: box[3] - box[1],
color: rgb(1, 1, 0), // Yellow color for highlight
opacity: 0.5, // 50% opacity
borderColor: rgb(0, 0, 0), // Black border
borderWidth: 0.5, // Border width
});
}
}
// Save the modified PDF
const modifiedPdfBytes = await pdfDoc.save();
// Write the modified PDF to a new file
await fs.writeFile(outputPath, modifiedPdfBytes);
console.log('PDF with highlight annotations saved successfully!');
} catch (error) {
console.error('Error:', error);
}
}
// Example usage
const url = ' ';
const inputPath = 'Document1.pdf'; // Path to save the downloaded PDF file
const outputPath = 'output.pdf'; // Path to save the output PDF file
const jsonData = {
"docs": [
{
"id": null,
"fileName": "sample.pdf",
"pages": [
{
"id": 0,
"box": "0 0 1224 1584",
"words": [
{
"id": 42,
"text": "patient",
"box": "366 156 403 169"
},
{
"id": 1258,
"text": "Patient",
"box": "35 1373 83 1386"
}
]
},
{
"id": 1,
"box": "0 0 1224 1584",
"words": [
{
"id": 11,
"text": "Patient:",
"box": "98 124 179 146"
}
]
}
]
}
]
};
downloadPDF(url, inputPath)
.then(() => highlightPDFFromJSON(inputPath, outputPath, jsonData))
.catch(error => console.error('Error downloading PDF:', error));
Output:
Upvotes: 0