Mako 8.2.0 API
MakoCore SDK API Documentation
Loading...
Searching...
No Matches
text.h
Go to the documentation of this file.
1/*
2 * Copyright (C) 2018-2025 Global Graphics Software Ltd. All rights reserved.
3 */
4
5#ifndef JAWSMAKO_TEXT_H
6#define JAWSMAKO_TEXT_H
7
15
16#include <jawsmako/types.h>
18#include <edl/idomglyphs.h>
19
20namespace JawsMako
21{
22 using namespace EDL;
23
24 class IUnicodeHelper;
26
32
33 {
35 };
36
43 {
44 private:
48 static int compare(const void *a, const void *b);
49
50 public:
51 virtual ~IUnicodeHelper() {}
52
58 static JAWSMAKO_API IUnicodeHelperPtr create(eLanguageType language);
59
65 static JAWSMAKO_API bool isSpaceCharacter(wchar_t ch);
66
72 virtual bool isCombiningCharacter(const wchar_t wchar) const = 0;
73
79 virtual wchar_t getNonContextualCharacter(const wchar_t ch) const = 0;
80
85 virtual bool hasContextualForms() const = 0;
86
95
100 virtual void applyDiacriticFixes(CTextRunVect &runs) const = 0;
101
106 virtual void applyContextualSubstitution(String &input) const = 0;
107 };
108
115 class ITextRun : public IRCObject
116 {
117 public:
118 virtual ~ITextRun() {}
119
124 virtual const String &getUnicode() const = 0;
125
130 virtual U8String getUTF8() const = 0;
131
138 virtual FMatrix getTransform() const = 0;
139
144 virtual FPoint getLocalOrigin() const = 0;
145
152 {
153 FPoint origin = getLocalOrigin();
154 getTransform().transform(origin, origin);
155 return origin;
156 }
157
164 virtual FRect getLocalBounds(bool tight = true) const = 0;
165
173 virtual FRect getBoundsOnPage(bool tight = true) const
174 {
175 FRect bounds = getLocalBounds(tight);
176 getTransform().transformRect(bounds);
177 return bounds;
178 }
179
189 virtual void getCornersOnPage(FPoint &p1, FPoint &p2, FPoint &p3, FPoint &p4, bool tight) const = 0;
190
197 virtual CTextRunVect split() = 0;
198
207 virtual double getSpaceWidth() const = 0;
208
217 virtual double getSpaceWidthOnPage() const = 0;
218
225 virtual double getFontHeightOnPage() const = 0;
226
231 virtual IDOMGlyphsPtr getGlyphs() const = 0;
232 };
233
234 class IPageLayout;
236
248
258
259 class IPageLayoutData;
261
262 class IPageLayoutNode;
264
267
274 {
275 public:
276 virtual ~IPageLayoutData() {}
277
284 static JAWSMAKO_API IPageLayoutDataPtr create(IPageLayoutNodeCollection data);
285
292 virtual IPageLayoutNodeCollection getColumn(uint32 columnNumber) = 0;
293
298 virtual uint32 getNumberOfColumns() const = 0;
299 };
300
307 {
308 public:
309 virtual ~IPageLayoutNode() {}
310
320 static JAWSMAKO_API IPageLayoutNodePtr create(FRect rect, ePageLayoutType type, String content, const IDOMGlyphsPtr &contentGlyphs, uint32 columnNumber);
321
327 virtual String getContent() const = 0;
328
334 virtual IDOMGlyphsPtr getContentGlyphs() const = 0;
335
340 virtual FRect getPageBounds() const = 0;
341
346 virtual ePageLayoutType getType() const = 0;
347
352 virtual uint32 getColumnNumber() const = 0;
353 };
354
361 class IPageLayout : public IRCObject
362 {
363 public:
364 virtual ~IPageLayout() {}
365
373 static JAWSMAKO_API IPageLayoutPtr create(IEDLClassFactory *factory, const IDOMFixedPagePtr &page);
374
382 static JAWSMAKO_API IPageLayoutPtr create(IEDLClassFactory *factory, const CTextRunVect &runs);
383
388 virtual IDOMFixedPagePtr getFixedPage() const = 0;
389
396 virtual void analyze(ePageAnalysis analysisToPerform = ePAAll) = 0;
397
402 virtual String getLayoutInfo() const = 0;
403
408 virtual IPageLayoutDataPtr getLayoutData() const = 0;
409
415
420 virtual String getPageText() const = 0;
421
429 virtual void setVirtualSpaceThreshold(double virtualSpaceThreshold) = 0;
430
440 virtual void setMultipleSpaceMode(bool multipleSpaces) = 0;
441
452 virtual void setLineSpacingThreshold(double threshold) = 0;
453 };
454
455 class ITextSearch;
457
463 class ITextSearch : public IRCObject
464 {
465 public:
466 virtual ~ITextSearch() {}
467
475 static JAWSMAKO_API ITextSearchPtr create(IEDLClassFactory *factory, const IPageLayoutPtr &pageLayout);
476
486 virtual CFPointVectVect search(const String &targetText, CEDLStringVect &found, bool caseSensitive, bool ignoreSpaces) const = 0;
487
497 virtual CFPointVectVect search(const String &targetText, CEDLSysStringVect &found, bool caseSensitive, bool ignoreSpaces) const = 0;
498 };
499
500 class ITextSelect;
502
508 class ITextSelect : public IRCObject
509 {
510 public:
511 virtual ~ITextSelect() {}
512
520 static JAWSMAKO_API ITextSelectPtr create(IEDLClassFactory *factory, const IPageLayoutPtr &pageLayout);
521
529 virtual String getTextAtRect(const FRect &pageArea, eLanguageType language = eLTArabic) const = 0;
530
538 virtual CFPointVect selectArea(const FRect &pageArea, String *selectedText) const = 0;
539
548 virtual CFPointVect selectLines(const FPoint &startPoint, const FPoint &endPoint, String *selectedText) const = 0;
549 };
550}
551
552#endif
Definition edlvector.h:30
void transform(PointTmpl< TItem > &result, const PointTmpl< TItem > &point, bool ignoreDXDY=false) const
Transform a point.
Definition edlgeom.h:725
void transformRect(RectTmpl< TItem > &rect, bool ignoreDXDY=false) const
Transform a rectangle.
Definition edlgeom.h:894
EDL Factory Interface allows one part of the EDL infrastructure to register class creation methods id...
Definition iedlfactory.h:31
Base class Interface for all Reference Counted objects.
Definition ircobject.h:35
Provides a representation of the analyzed page layout by organizing and allowing access to collection...
Definition text.h:274
virtual ~IPageLayoutData()
Definition text.h:276
virtual IPageLayoutNodeCollection getColumn(uint32 columnNumber)=0
Get a collection of IPageLayoutNodes, representing the content of the specified column number....
static JAWSMAKO_API IPageLayoutDataPtr create(IPageLayoutNodeCollection data)
Creation function for IPageLayoutData that provides a representation of the analyzed page layout,...
virtual uint32 getNumberOfColumns() const =0
Get the number of columns (this includes the root item)
Analyze the layout of a FixedPage, grouping together text deemed to be in horizontal and/or vertical ...
Definition text.h:362
static JAWSMAKO_API IPageLayoutPtr create(IEDLClassFactory *factory, const IDOMFixedPagePtr &page)
Creation function for an IPageLayout, a fixed page layout analyser Throws an IEDLError on failure.
virtual IPageLayoutNodeCollection getLayoutNodeCollection() const =0
Get a flat collection of page content nodes.
virtual String getPageText() const =0
Return all page text.
virtual void analyze(ePageAnalysis analysisToPerform=ePAAll)=0
Process the page find the blocks of text. Can optionally perform each analysis phase independently (w...
virtual IDOMFixedPagePtr getFixedPage() const =0
Get the FixedPage being processed.
virtual String getLayoutInfo() const =0
Get a textual description of the page content, useful for debugging purposes.
virtual void setLineSpacingThreshold(double threshold)=0
Set the threshold for spacing between successive text lines when concatenating them into a text run.
virtual void setVirtualSpaceThreshold(double virtualSpaceThreshold)=0
Set the virtual space threshold.
static JAWSMAKO_API IPageLayoutPtr create(IEDLClassFactory *factory, const CTextRunVect &runs)
Creation function for an IPageLayout, a fixed page layout analyser Throws an IEDLError on failure.
virtual ~IPageLayout()
Definition text.h:364
virtual IPageLayoutDataPtr getLayoutData() const =0
Get a processed representation of the page content.
virtual void setMultipleSpaceMode(bool multipleSpaces)=0
Set the multiple space mode for inserting virtual spaces.
Simple data type representing a part of an analyzed page.
Definition text.h:307
virtual FRect getPageBounds() const =0
Gets the page bounds of the node.
virtual ~IPageLayoutNode()
Definition text.h:309
virtual String getContent() const =0
Get the unicode string content of the node, if it's of type ePLTTextRun otherwise returns an empty st...
virtual ePageLayoutType getType() const =0
Get the type of node.
virtual IDOMGlyphsPtr getContentGlyphs() const =0
Get the IDOMGlyphsNode that represents the content of the node, if it's of type ePLTTextRun otherwise...
static JAWSMAKO_API IPageLayoutNodePtr create(FRect rect, ePageLayoutType type, String content, const IDOMGlyphsPtr &contentGlyphs, uint32 columnNumber)
Creation function for an IPageLayoutNode, a simple data type representing a part of an analyzed page.
virtual uint32 getColumnNumber() const =0
Get the column number that the node belongs to. Zero is the root element.
A run of text, containing unicode information, the position, transformation and bounds of the text.
Definition text.h:116
virtual double getSpaceWidthOnPage() const =0
Determine the width of a space in the font used for this run. If the font does not have a space chara...
virtual FPoint getLocalOrigin() const =0
Get the origin of the first character, in glyph-local coordinates.
virtual FRect getLocalBounds(bool tight=true) const =0
Get the bounds of the run, in glyph-local coordinates.
virtual U8String getUTF8() const =0
Get the Unicode string for the run as available, in UTF-8.
virtual double getFontHeightOnPage() const =0
Determine the height of the font used for this run.
virtual FMatrix getTransform() const =0
Get the transformation active where the glyph run is positioned, relative to the page....
FPoint getOriginOnPage() const
Get the origin of the first character, in page coordinates. Convenience.
Definition text.h:151
virtual FRect getBoundsOnPage(bool tight=true) const
Get the bounds of the run, in page coordinates. Convenience.
Definition text.h:173
virtual ~ITextRun()
Definition text.h:118
virtual CTextRunVect split()=0
Split the run into the smallest possible units, returning the split runs. If the run cannot be split,...
virtual double getSpaceWidth() const =0
Determine the width of a space in the font used for this run. If the font does not have a space chara...
virtual IDOMGlyphsPtr getGlyphs() const =0
Get the IDOMGlyphs node that represents this run.
virtual void getCornersOnPage(FPoint &p1, FPoint &p2, FPoint &p3, FPoint &p4, bool tight) const =0
Get the corner points of the run (in the clockwise order), in page coordinates.
virtual const String & getUnicode() const =0
Get the Unicode string for the run as available.
Perform text searching using the page information obtained from an IPageLayout.
Definition text.h:464
virtual CFPointVectVect search(const String &targetText, CEDLStringVect &found, bool caseSensitive, bool ignoreSpaces) const =0
Return a collection of quadpoint data covering all found text. Each entry in the CFPointVectVect repr...
virtual ~ITextSearch()
Definition text.h:466
static JAWSMAKO_API ITextSearchPtr create(IEDLClassFactory *factory, const IPageLayoutPtr &pageLayout)
Creation function for ITextSearch that performs text searching using page information obtained from a...
virtual CFPointVectVect search(const String &targetText, CEDLSysStringVect &found, bool caseSensitive, bool ignoreSpaces) const =0
Return a collection of quadpoint data covering all found text. Each entry in the CFPointVectVect repr...
Perform text selection using the page information obtained from an IPageLayout.
Definition text.h:509
virtual CFPointVect selectLines(const FPoint &startPoint, const FPoint &endPoint, String *selectedText) const =0
Return quadpoint data covering all text within the specified page start/end points,...
virtual CFPointVect selectArea(const FRect &pageArea, String *selectedText) const =0
Return quadpoint data covering all text within the specified page area, optionally returning the actu...
virtual ~ITextSelect()
Definition text.h:511
virtual String getTextAtRect(const FRect &pageArea, eLanguageType language=eLTArabic) const =0
Return the unicode string located within the bounds of 'pageArea', using the specified 'language' uni...
static JAWSMAKO_API ITextSelectPtr create(IEDLClassFactory *factory, const IPageLayoutPtr &pageLayout)
Creation function for an ITextSelect that performs text selection using page information obtained fro...
An interface into language specific unicode helpers.
Definition text.h:43
static JAWSMAKO_API IUnicodeHelperPtr create(eLanguageType language)
Creates a unicode helper for the specified language.
virtual wchar_t getNonContextualCharacter(const wchar_t ch) const =0
Performs a reverse mapping on a contextual character and returns the original.
virtual bool isCombiningCharacter(const wchar_t wchar) const =0
Returns true is wchar is within the language's combining character ranges.
virtual void applyContextualSubstitution(String &input) const =0
Applies the language's contextual substitution rules, in place.
virtual ~IUnicodeHelper()
Definition text.h:51
virtual bool hasContextualForms() const =0
Returns true if the language has contextual forms.
static JAWSMAKO_API bool isSpaceCharacter(wchar_t ch)
Returns true if ch is a unicode space character.
virtual String constructStringFromRuns(CTextRunVect &runs) const =0
Creates a string from a CTextRunVect by applying diacritic fixes and contextual substitution,...
virtual void applyDiacriticFixes(CTextRunVect &runs) const =0
Checks and attempts to fix diacritic characters that are incorrectly ordered.
PointTmpl< double > FPoint
Definition edlgeom.h:102
CTransformMatrix< double > FMatrix
Definition edlgeom.h:1208
RectTmpl< double > FRect
Definition edlgeom.h:338
CEDLVector< FPoint > CFPointVect
Definition edlgeom.h:106
CEDLVector< EDLString > CEDLStringVect
Definition edlstring.h:173
CEDLVector< EDLSysString > CEDLSysStringVect
Definition edlstring.h:174
unsigned int uint32
Definition edltypes.h:34
eLanguageType
Language types.
Definition text.h:33
@ eLTArabic
Arabic language type.
Definition text.h:34
EDLSysString U8String
A UTF-8 String.
Definition types.h:144
EDLString String
A wide character string (UTF-16 on Windows, UTF-32 on all other platforms)
Definition types.h:138
JawsMako interactive features.
Definition apexcustompostprocess.h:17
ePageAnalysis
IPageLayout analysis options.
Definition text.h:241
@ ePADefineReadingOrder
Sort the runs of text into a suitable reading order.
Definition text.h:246
@ ePAAll
Complete all page analysis steps (build, join and reorder text runs) in one operation.
Definition text.h:242
@ ePAJoinVRuns
Find all compatible vertical runs of text.
Definition text.h:245
@ ePABuildPage
Locate all glyphs nodes on the page.
Definition text.h:243
@ ePAJoinHRuns
Find all compatible horizonal runs of text.
Definition text.h:244
ePageLayoutType
IPageLayoutNode types.
Definition text.h:253
@ ePLTColumn
Column.
Definition text.h:255
@ ePLTTextRun
Run of text.
Definition text.h:256
@ ePLTRoot
Whole page.
Definition text.h:254
CEDLVector< CFPointVect > CFPointVectVect
Definition interactive.h:1253
CEDLVector< IPageLayoutNodePtr > IPageLayoutNodeCollection
Definition text.h:266
CEDLVector< ITextRunPtr > CTextRunVect
Definition types.h:171
#define DECL_SMART_PTR(cls)
Definition smartptr.h:211
#define JAWSMAKO_API
Definition types.h:29