Reputation: 167
I'm working with iText7 to read the PDF file data and create an xml file based on their location in the PDF file. My problem is,
There is a class LocationTextExtractionStrategy
in iText7 assembly which is as shown below
public class LocationTextExtractionStrategy : ITextExtractionStrategy, IEventListener
{
public LocationTextExtractionStrategy(ITextChunkLocationStrategy strat);
public virtual void EventOccurred(IEventData data, EventType type);
public interface ITextChunkLocationStrategy
{
ITextChunkLocation CreateLocation(TextRenderInfo renderInfo, LineSegment baseline);
}
}
Now I inherited this class into my project class as shown below
public class TextLocationExtractionStrategy : LocationTextExtractionStrategy
{
ITextChunkLocationStrategy locationstrategy ;
public TextLocationExtractionStrategy(ITextChunkLocationStrategy strategy)
{
locationstrategy = strategy;
}
public override void EventOccurred(IEventData data, EventType type)
{
if (!type.Equals(EventType.RENDER_TEXT))
return;
TextRenderInfo renderInfo = (TextRenderInfo)data;
string curFont = renderInfo.GetFont().GetFontProgram().ToString();
float curFontSize = renderInfo.GetFontSize();
IList<TextRenderInfo> text = renderInfo.GetCharacterRenderInfos();
foreach (TextRenderInfo t in text)
{
LineSegment lineSegment = t.GetBaseline();
string letter = t.GetText();
Vector letterStart = t.GetBaseline().GetStartPoint();
Vector letterEnd = t.GetAscentLine().GetEndPoint();
Rectangle letterRect = new Rectangle(letterStart.Get(0), letterStart.Get(1),
letterEnd.Get(0) - letterStart.Get(0), letterEnd.Get(1) - letterStart.Get(1));
if (letter != " " && !letter.Contains(' '))
{
ITextChunkLocation loc = locationstrategy.CreateLocation(t, lineSegment);
UTextChunk chunk = new UTextChunk(t, t.GetText(), loc);
t.GetText();
t.GetBaseline().GetBoundingRectangle();
m_locationResult.Add(chunk);
}
}
}
}
In the EventOccurred
method, I have called the CreateLocation
method of the base class (inside the if
condition).
Now my problem is, how can I pass an interface as a parameter to the Constructor of my derived class, so that I can easily call CreateLocation
method of the base class?
I'm trying as shown below, but I don't understand how can I create a proper object or something else which can be passed to the constructor with the help of which I can call CreateLocation
method.
LocationTextExtractionStrategy.ITextChunkLocationStrategy locst = null;
TextLocationExtractionStrategy strategy = new TextLocationExtractionStrategy(locst);
PdfTextExtractor.GetTextFromPage(page, strategy))
As we can't create an instance of an interface, I don't know how to pass value to a parameter of type interface. In the above lines I just assigned it to null and calling the constructor, but with null value assigned to it, it'll say "object reference not set to an instance of an object".
Can you please help me in resolving this issue.
Please let me know or correct me if there is any issue in the way of presenting my question.
Upvotes: 1
Views: 1940
Reputation: 95918
If you look at the source of your base class LocationTextExtractionStrategy
(it's open source, so look at the source!), you'll see that it does not only have the constructor with an ITextChunkLocationStrategy
argument, it has a constructor without argument, too.
That constructor actually instantiates an implementation of that interface and forwards that to the same constructor you refer to:
public LocationTextExtractionStrategy()
: this(new _ITextChunkLocationStrategy_85()) {
}
private sealed class _ITextChunkLocationStrategy_85 : LocationTextExtractionStrategy.ITextChunkLocationStrategy {
public _ITextChunkLocationStrategy_85() {
}
public ITextChunkLocation CreateLocation(TextRenderInfo renderInfo, LineSegment baseline) {
return new TextChunkLocationDefaultImp(baseline.GetStartPoint(), baseline.GetEndPoint(), renderInfo.GetSingleSpaceWidth());
}
}
As you want to use the ITextChunkLocationStrategy
implementation and the base class does not provide a getter for it, you cannot simply use the that other constructor without arguments. And you cannot instantiate that _ITextChunkLocationStrategy_85
class either as it's private. And you cannot simply copy that _ITextChunkLocationStrategy_85
into your code because TextChunkLocationDefaultImp
is internal
. <sigh>
What you can do, though, is copying TextChunkLocationDefaultImp
into your code, then copying _ITextChunkLocationStrategy_85
into your code, replacing the use of TextChunkLocationDefaultImp
from the base class by a use of your copy of that class, and then instantiate your copy of the _ITextChunkLocationStrategy_85
class to eventually get an ITextChunkLocationStrategy
implementation instance.
Alternatively you can try and work with reflection and introspection. This might result in maintenance issues, though.
It's a real nuisance if a library provides means to generalize something and then hides the default implementation thereof...
For reference, TextChunkLocationDefaultImp
currently is implemented as follows
internal class TextChunkLocationDefaultImp : ITextChunkLocation {
private const float DIACRITICAL_MARKS_ALLOWED_VERTICAL_DEVIATION = 2;
/// <summary>the starting location of the chunk</summary>
private readonly Vector startLocation;
/// <summary>the ending location of the chunk</summary>
private readonly Vector endLocation;
/// <summary>unit vector in the orientation of the chunk</summary>
private readonly Vector orientationVector;
/// <summary>the orientation as a scalar for quick sorting</summary>
private readonly int orientationMagnitude;
/// <summary>perpendicular distance to the orientation unit vector (i.e.</summary>
/// <remarks>
/// perpendicular distance to the orientation unit vector (i.e. the Y position in an unrotated coordinate system)
/// we round to the nearest integer to handle the fuzziness of comparing floats
/// </remarks>
private readonly int distPerpendicular;
/// <summary>distance of the start of the chunk parallel to the orientation unit vector (i.e.</summary>
/// <remarks>distance of the start of the chunk parallel to the orientation unit vector (i.e. the X position in an unrotated coordinate system)
/// </remarks>
private readonly float distParallelStart;
/// <summary>distance of the end of the chunk parallel to the orientation unit vector (i.e.</summary>
/// <remarks>distance of the end of the chunk parallel to the orientation unit vector (i.e. the X position in an unrotated coordinate system)
/// </remarks>
private readonly float distParallelEnd;
/// <summary>the width of a single space character in the font of the chunk</summary>
private readonly float charSpaceWidth;
public TextChunkLocationDefaultImp(Vector startLocation, Vector endLocation, float charSpaceWidth) {
this.startLocation = startLocation;
this.endLocation = endLocation;
this.charSpaceWidth = charSpaceWidth;
Vector oVector = endLocation.Subtract(startLocation);
if (oVector.Length() == 0) {
oVector = new Vector(1, 0, 0);
}
orientationVector = oVector.Normalize();
orientationMagnitude = (int)(Math.Atan2(orientationVector.Get(Vector.I2), orientationVector.Get(Vector.I1)
) * 1000);
// see http://mathworld.wolfram.com/Point-LineDistance2-Dimensional.html
// the two vectors we are crossing are in the same plane, so the result will be purely
// in the z-axis (out of plane) direction, so we just take the I3 component of the result
Vector origin = new Vector(0, 0, 1);
distPerpendicular = (int)(startLocation.Subtract(origin)).Cross(orientationVector).Get(Vector.I3);
distParallelStart = orientationVector.Dot(startLocation);
distParallelEnd = orientationVector.Dot(endLocation);
}
public virtual int OrientationMagnitude() {
return orientationMagnitude;
}
public virtual int DistPerpendicular() {
return distPerpendicular;
}
public virtual float DistParallelStart() {
return distParallelStart;
}
public virtual float DistParallelEnd() {
return distParallelEnd;
}
/// <returns>the start location of the text</returns>
public virtual Vector GetStartLocation() {
return startLocation;
}
/// <returns>the end location of the text</returns>
public virtual Vector GetEndLocation() {
return endLocation;
}
/// <returns>the width of a single space character as rendered by this chunk</returns>
public virtual float GetCharSpaceWidth() {
return charSpaceWidth;
}
/// <param name="as">the location to compare to</param>
/// <returns>true is this location is on the the same line as the other</returns>
public virtual bool SameLine(ITextChunkLocation @as) {
if (OrientationMagnitude() != @as.OrientationMagnitude()) {
return false;
}
float distPerpendicularDiff = DistPerpendicular() - @as.DistPerpendicular();
if (distPerpendicularDiff == 0) {
return true;
}
LineSegment mySegment = new LineSegment(startLocation, endLocation);
LineSegment otherSegment = new LineSegment(@as.GetStartLocation(), @as.GetEndLocation());
return Math.Abs(distPerpendicularDiff) <= DIACRITICAL_MARKS_ALLOWED_VERTICAL_DEVIATION && (mySegment.GetLength
() == 0 || otherSegment.GetLength() == 0);
}
/// <summary>
/// Computes the distance between the end of 'other' and the beginning of this chunk
/// in the direction of this chunk's orientation vector.
/// </summary>
/// <remarks>
/// Computes the distance between the end of 'other' and the beginning of this chunk
/// in the direction of this chunk's orientation vector. Note that it's a bad idea
/// to call this for chunks that aren't on the same line and orientation, but we don't
/// explicitly check for that condition for performance reasons.
/// </remarks>
/// <param name="other"/>
/// <returns>the number of spaces between the end of 'other' and the beginning of this chunk</returns>
public virtual float DistanceFromEndOf(ITextChunkLocation other) {
return DistParallelStart() - other.DistParallelEnd();
}
public virtual bool IsAtWordBoundary(ITextChunkLocation previous) {
// In case a text chunk is of zero length, this probably means this is a mark character,
// and we do not actually want to insert a space in such case
if (startLocation.Equals(endLocation) || previous.GetEndLocation().Equals(previous.GetStartLocation())) {
return false;
}
float dist = DistanceFromEndOf(previous);
if (dist < 0) {
dist = previous.DistanceFromEndOf(this);
//The situation when the chunks intersect. We don't need to add space in this case
if (dist < 0) {
return false;
}
}
return dist > GetCharSpaceWidth() / 2.0f;
}
internal static bool ContainsMark(ITextChunkLocation baseLocation, ITextChunkLocation markLocation) {
return baseLocation.GetStartLocation().Get(Vector.I1) <= markLocation.GetStartLocation().Get(Vector.I1) &&
baseLocation.GetEndLocation().Get(Vector.I1) >= markLocation.GetEndLocation().Get(Vector.I1) && Math.
Abs(baseLocation.DistPerpendicular() - markLocation.DistPerpendicular()) <= DIACRITICAL_MARKS_ALLOWED_VERTICAL_DEVIATION;
}
}
Upvotes: 1