Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support RTL/bidi text, some font and harfbuzz fixes #309

Merged
merged 11 commits into from
Sep 15, 2019
2 changes: 2 additions & 0 deletions crengine/include/fb2def.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ XS_TAG1I( span )
XS_TAG1I( strong )
XS_TAG1I( sub )
XS_TAG1I( sup )
XS_TAG1I( bdi )
XS_TAG1I( bdo )

// EPUB3 elements (in ns_epub - otherwise set to inline like any unknown element)
XS_TAG1I( switch ) // <epub:switch>
Expand Down
6 changes: 3 additions & 3 deletions crengine/include/hyphman.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
class HyphMethod
{
public:
virtual bool hyphenate( const lChar16 * str, int len, lUInt16 * widths, lUInt8 * flags, lUInt16 hyphCharWidth, lUInt16 maxWidth ) = 0;
virtual bool hyphenate( const lChar16 * str, int len, lUInt16 * widths, lUInt8 * flags, lUInt16 hyphCharWidth, lUInt16 maxWidth, size_t flagSize=1 ) = 0;
virtual ~HyphMethod() { }
};

Expand Down Expand Up @@ -123,9 +123,9 @@ class HyphMan
HyphMan();
~HyphMan();

inline static bool hyphenate( const lChar16 * str, int len, lUInt16 * widths, lUInt8 * flags, lUInt16 hyphCharWidth, lUInt16 maxWidth )
inline static bool hyphenate( const lChar16 * str, int len, lUInt16 * widths, lUInt8 * flags, lUInt16 hyphCharWidth, lUInt16 maxWidth, size_t flagSize=1 )
{
return _method->hyphenate( str, len, widths, flags, hyphCharWidth, maxWidth );
return _method->hyphenate( str, len, widths, flags, hyphCharWidth, maxWidth, flagSize );
}
};

Expand Down
46 changes: 36 additions & 10 deletions crengine/include/lvfnt.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,16 +215,42 @@ lUInt16 lvfontMeasureText( const lvfont_handle pfont,
lChar16 def_char
);

#define LCHAR_IS_SPACE 1 ///< flag: this char is one of unicode space chars
#define LCHAR_ALLOW_WRAP_AFTER 2 ///< flag: line break after this char is allowed
#define LCHAR_DEPRECATED_WRAP_AFTER 4 ///< flag: line break after this char is possible but deprecated
#define LCHAR_ALLOW_HYPH_WRAP_AFTER 8 ///< flag: line break after this char is allowed with addition of hyphen
#define LCHAR_IS_LIGATURE_TAIL 16 ///< flag: this char is a tail of a ligature (ligature is carried by first char)
#define LCHAR_IS_OBJECT 32 ///< flag: this char is object or image
#define LCHAR_MANDATORY_NEWLINE 64 ///< flag: this char must start with new line
#define LCHAR_IS_COLLAPSED_SPACE 128 ///< flag: this char is a space that should not be displayed
// LCHAR_IS_EOL was not used by any code, and has been replaced by LCHAR_IS_LIGATURE_TAIL
// #define LCHAR_IS_EOL 16 ///< flag: this char is CR or LF
// These lower than 0x0100 (that fit in a lUint8) may be set by lvfntman's measureText()
// (to possibly get some informative flags back from harfbuzz) and hyphman's hyphenate().
// (These should be changed or dropped with care, as they may be used by some other parts of CoolReader)
#define LCHAR_IS_SPACE 0x0001 ///< flag: this char is one of the unicode space chars.
// It is set only on the normal space and the normal non-breakable
// space (spaces that can have their widths expanded or shrunk).
// It is not set on the unicode fixed width spaces.
#define LCHAR_ALLOW_WRAP_AFTER 0x0002 ///< flag: line break after this char is allowed.
// It is set on all spaces, except non-breakable ones.
// It is set on soft-hyphen.
// It is not set on CJK chars.
#define LCHAR_DEPRECATED_WRAP_AFTER 0x0004 ///< flag: line break after this char is possible but deprecated
// It is set on '-' and other unicode hyphens.
#define LCHAR_ALLOW_HYPH_WRAP_AFTER 0x0008 ///< flag: line break after this char is allowed with addition of hyphen
// It is set by Hyphman when finding hyphenation points in a word.
#define LCHAR_MANDATORY_NEWLINE 0x0010 ///< flag: this char must start with new line
#define LCHAR_IS_CLUSTER_TAIL 0x0020 ///< flag: this char is a tail of a cluster (eg. ligature,
// whose glyph is carried by first char)
// It is set by harfbuzz when used.

/// The next ones, not fitting in a lUInt8, should only be set and used by lvtextfm
#define LCHAR_IS_OBJECT 0x0100 ///< flag: this char is object (image, float)
#define LCHAR_IS_COLLAPSED_SPACE 0x0200 ///< flag: this char is a space that should not be rendered
#define LCHAR_IS_TO_IGNORE 0x0400 ///< flag: this char is to be ignored/skipped in text measurement and drawing
#define LCHAR_IS_RTL 0x0800 ///< flag: this char is part of a RTL segment

// (Next ones are not yet used and can be removed/changed)
#define LCHAR_IS_CJK_NOT_PUNCT 0x1000 ///< flag: this char is part a CJK char but not a punctuation
#define LCHAR_IS_CJK_LEFT_PUNCT 0x2000 ///< flag: this char is part a CJK left punctuation
#define LCHAR_IS_CJK_RIGHT_PUNCT 0x4000 ///< flag: this char is part a CJK right punctuation

#define LCHAR_IS_CJK_PUNCT 0x6000 ///< flag: (for checking) this char is a CJK punctuation (neutral if set)
#define LCHAR_IS_CJK 0x7000 ///< flag: (for checking) this char is a CJK char

// LCHAR_IS_EOL was not used by any code, and has been replaced by LCHAR_IS_CLUSTER_TAIL
// #define LCHAR_IS_EOL 0x0010 ///< flag: this char is CR or LF

/** \brief returns true if character is unicode space
\param code is character
Expand Down
63 changes: 43 additions & 20 deletions crengine/include/lvfntman.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,21 @@ enum kerning_mode_t {
};


// Hint flags for measuring and drawing (some used only with full Harfbuzz)
// These 4 translate (after mask & shift) from LTEXT_WORD_* equivalents
// (see lvtextfm.h). Keep them in sync.
#define LFNT_HINT_DIRECTION_KNOWN 0x0001 /// segment direction is known
#define LFNT_HINT_DIRECTION_IS_RTL 0x0002 /// segment direction is RTL
#define LFNT_HINT_BEGINS_PARAGRAPH 0x0004 /// segment is at start of paragraph
#define LFNT_HINT_ENDS_PARAGRAPH 0x0008 /// segment is at end of paragraph

// These 4 translate from LTEXT_TD_* equivalents (see lvtextfm.h). Keep them in sync.
#define LFNT_DRAW_UNDERLINE 0x0100 /// underlined text
#define LFNT_DRAW_OVERLINE 0x0200 /// overlined text
#define LFNT_DRAW_LINE_THROUGH 0x0400 /// striked through text
#define LFNT_DRAW_BLINK 0x0800 /// blinking text (implemented as underline)
#define LFNT_DRAW_DECORATION_MASK 0x0F00

/** \brief base class for fonts

implements single interface for font of any engine
Expand Down Expand Up @@ -215,6 +230,7 @@ class LVFont : public LVRefCounter
\param max_width is maximum width to measure line
\param def_char is character to replace absent glyphs in font
\param letter_spacing is number of pixels to add between letters
\param hints: hint flags (direction, begin/end of paragraph, for Harfbuzz - unrelated to font hinting)
\return number of characters before max_width reached
*/
virtual lUInt16 measureText(
Expand All @@ -224,28 +240,30 @@ class LVFont : public LVRefCounter
int max_width,
lChar16 def_char,
int letter_spacing=0,
bool allow_hyphenation=true
bool allow_hyphenation=true,
lUInt32 hints=0
) = 0;

/** \brief measure text
\param text is text string pointer
\param len is number of characters to measure
\return width of specified string
*/
virtual lUInt32 getTextWidth(
const lChar16 * text, int len
) = 0;

// /** \brief get glyph image in 1 byte per pixel format
// \param code is unicode character
// \param buf is buffer [width*height] to place glyph data
// \return true if glyph was found
// */
// virtual bool getGlyphImage(lUInt16 code, lUInt8 * buf, lChar16 def_char=0) = 0;
virtual lUInt32 getTextWidth( const lChar16 * text, int len ) = 0;

// /** \brief get glyph image in 1 byte per pixel format
// \param code is unicode character
// \param buf is buffer [width*height] to place glyph data
// \return true if glyph was found
// */
// virtual bool getGlyphImage(lUInt16 code, lUInt8 * buf, lChar16 def_char=0) = 0;

/** \brief get glyph item
\param code is unicode character
\return glyph pointer if glyph was found, NULL otherwise
*/
virtual LVFontGlyphCacheItem * getGlyph(lUInt32 ch, lChar16 def_char=0) = 0;

/// returns font baseline offset
virtual int getBaseline() = 0;
/// returns font height including normal interline space
Expand All @@ -268,8 +286,8 @@ class LVFont : public LVRefCounter
virtual lString8 getTypeFace() const = 0;
/// returns font family id
virtual css_font_family_t getFontFamily() const = 0;
/// draws text string
virtual void DrawTextString( LVDrawBuf * buf, int x, int y,
/// draws text string (returns x advance)
virtual int DrawTextString( LVDrawBuf * buf, int x, int y,
const lChar16 * text, int len,
lChar16 def_char, lUInt32 * palette = NULL, bool addHyphen = false,
lUInt32 flags=0, int letter_spacing=0, int width=-1,
Expand Down Expand Up @@ -374,6 +392,8 @@ class LVFontManager
virtual lString8 GetFallbackFontFace() { return lString8::empty_str; }
/// returns fallback font for specified size
virtual LVFontRef GetFallbackFont(int /*size*/) { return LVFontRef(); }
/// returns fallback font for specified size, weight and italic
virtual LVFontRef GetFallbackFont(int size, int weight=400, bool italic=false ) { return LVFontRef(); }
/// registers font by name
virtual bool RegisterFont( lString8 name ) = 0;
/// registers font by name and face
Expand Down Expand Up @@ -445,8 +465,8 @@ class LVBaseFont : public LVFont
virtual lString8 getTypeFace() const { return _typeface; }
/// returns font family id
virtual css_font_family_t getFontFamily() const { return _family; }
/// draws text string
virtual void DrawTextString( LVDrawBuf * buf, int x, int y,
/// draws text string (returns x advance)
virtual int DrawTextString( LVDrawBuf * buf, int x, int y,
const lChar16 * text, int len,
lChar16 def_char, lUInt32 * palette, bool addHyphen,
lUInt32 flags=0, int letter_spacing=0, int width=-1,
Expand All @@ -469,7 +489,8 @@ class LBitmapFont : public LVBaseFont
int max_width,
lChar16 def_char,
int letter_spacing=0,
bool allow_hyphenation=true
bool allow_hyphenation=true,
lUInt32 hints=0
);
/** \brief measure text
\param text is text string pointer
Expand Down Expand Up @@ -630,7 +651,8 @@ class LVWin32DrawFont : public LVBaseWin32Font
int max_width,
lChar16 def_char,
int letter_spacing=0,
bool allow_hyphenation=true
bool allow_hyphenation=true,
lUInt32 hints=0
);
/** \brief measure text
\param text is text string pointer
Expand All @@ -644,8 +666,8 @@ class LVWin32DrawFont : public LVBaseWin32Font
/// returns char width
virtual int getCharWidth( lChar16 ch, lChar16 def_char=0 );

/// draws text string
virtual void DrawTextString( LVDrawBuf * buf, int x, int y,
/// draws text string (returns x advance)
virtual int DrawTextString( LVDrawBuf * buf, int x, int y,
const lChar16 * text, int len,
lChar16 def_char, lUInt32 * palette, bool addHyphen,
lUInt32 flags=0, int letter_spacing=0, int width=-1,
Expand Down Expand Up @@ -807,7 +829,8 @@ class LVWin32Font : public LVBaseWin32Font
int max_width,
lChar16 def_char,
int letter_spacing=0,
bool allow_hyphenation=true
bool allow_hyphenation=true,
lUInt32 hints=0
);
/** \brief measure text
\param text is text string pointer
Expand Down
21 changes: 17 additions & 4 deletions crengine/include/lvpagesplitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,11 +289,20 @@ class LVRendLineInfo {
{
clear();
}
void addLink( LVFootNote * note )
int getLinksCount()
{
if ( links==NULL )
return 0;
return links->length();
}
void addLink( LVFootNote * note, int pos=-1 )
{
if ( links==NULL )
links = new LVFootNoteList();
links->add( note );
if ( pos >= 0 ) // insert at pos
links->insert( pos, note );
else // append
links->add( note );
flags |= RN_SPLIT_FOOT_LINK;
}
};
Expand Down Expand Up @@ -371,8 +380,12 @@ class LVRendPageContext
}
bool updateRenderProgress( int numFinalBlocksRendered );

/// append footnote link to last added line
void addLink( lString16 id );
/// Get the number of links in the current line links list, or
// in link_ids when no page_list
int getCurrentLinksCount();

/// append or insert footnote link to last added line
void addLink( lString16 id, int pos=-1 );

/// get gathered links when no page_list
// (returns a reference to avoid lString16Collection destructor from
Expand Down
1 change: 1 addition & 0 deletions crengine/include/lvstring.h
Original file line number Diff line number Diff line change
Expand Up @@ -756,6 +756,7 @@ class lString16Collection
for (int i=0; i<v.length(); i++)
add( v[i] );
}
int insert( int pos, const lString16 & str );
void erase(int offset, int count);
/// split into several lines by delimiter
void split(const lString16 & str, const lString16 & delimiter);
Expand Down
38 changes: 23 additions & 15 deletions crengine/include/lvtextfm.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ extern "C" {
#define LTEXT_TD_LINE_THROUGH 0x0400 /**< \brief striked through text */
#define LTEXT_TD_BLINK 0x0800 /**< \brief blinking text */
#define LTEXT_TD_MASK 0x0F00 /**< \brief text decoration mask */
// These 4 above translate to LFNT_DRAW_* equivalents (see lvfntman.h). Keep them in sync.

#define LTEXT_SRC_IS_OBJECT 0x8000 /**< \brief object (image) */
#define LTEXT_IS_LINK 0x4000 /**< \brief link */
Expand Down Expand Up @@ -111,7 +112,7 @@ typedef struct
lUInt16 min_width; /**< \brief index of source text line */
lInt16 x; /**< \brief word x position in line */
lInt16 y; /**< \brief baseline y position */
lUInt8 flags; /**< \brief flags */
lUInt16 flags; /**< \brief flags */
union {
/// for text word
struct {
Expand All @@ -127,24 +128,31 @@ typedef struct
} formatted_word_t;

// formatted_word_t flags
/// can add space after this word
#define LTEXT_WORD_CAN_ADD_SPACE_AFTER 1
/// can break line after this word
#define LTEXT_WORD_CAN_BREAK_LINE_AFTER 2
/// can break with hyphenation after this word
#define LTEXT_WORD_CAN_HYPH_BREAK_LINE_AFTER 4
/// must break line after this word
#define LTEXT_WORD_MUST_BREAK_LINE_AFTER 8
/// object flag
#define LTEXT_WORD_IS_OBJECT 0x80
/// first word of link flag
#define LTEXT_WORD_IS_LINK_START 0x40
#define LTEXT_WORD_CAN_ADD_SPACE_AFTER 0x0001 /// can add space after this word
#define LTEXT_WORD_CAN_BREAK_LINE_AFTER 0x0002 /// can break line after this word (not used anywhere)
#define LTEXT_WORD_CAN_HYPH_BREAK_LINE_AFTER 0x0004 /// can break with hyphenation after this word
#define LTEXT_WORD_MUST_BREAK_LINE_AFTER 0x0008 /// must break line after this word (not used anywhere)

#define LTEXT_WORD_IS_LINK_START 0x0010 /// first word of link flag
#define LTEXT_WORD_IS_OBJECT 0x0020 /// object flag

#define LTEXT_WORD_DIRECTION_KNOWN 0x0100 /// word has been thru bidi: if next flag is unset, it is LTR.
#define LTEXT_WORD_DIRECTION_IS_RTL 0x0200 /// word is RTL
#define LTEXT_WORD_BEGINS_PARAGRAPH 0x0400 /// word is the first word of a paragraph
#define LTEXT_WORD_ENDS_PARAGRAPH 0x0800 /// word is the last word of a paragraph
// These 4 translate (after mask & shift) to LFNT_HINT_* equivalents
// (see lvfntman.h). Keep them in sync.
#define LTEXT_WORD_DIRECTION_PARA_MASK 0x0F00
#define LTEXT_WORD_DIRECTION_PARA_TO_LFNT_SHIFT 8
#define WORD_FLAGS_TO_FNT_FLAGS(f) ( (f & LTEXT_WORD_DIRECTION_PARA_MASK)>>LTEXT_WORD_DIRECTION_PARA_TO_LFNT_SHIFT)

//#define LTEXT_BACKGROUND_MARK_FLAGS 0xFFFF0000l

// formatted_line_t flags
#define LTEXT_LINE_SPLIT_AVOID_BEFORE 1
#define LTEXT_LINE_SPLIT_AVOID_AFTER 2
#define LTEXT_LINE_SPLIT_AVOID_BEFORE 0x01
#define LTEXT_LINE_SPLIT_AVOID_AFTER 0x02
#define LTEXT_LINE_IS_BIDI 0x04
#define LTEXT_LINE_PARA_IS_RTL 0x08

/** \brief Text formatter formatted line
*/
Expand Down
10 changes: 9 additions & 1 deletion crengine/include/lvtinydom.h
Original file line number Diff line number Diff line change
Expand Up @@ -2456,6 +2456,10 @@ class ldomDocumentFragmentWriter : public LVXMLParserCallback
lString8 headStyleText;
int headStyleState;

lString16 htmlDir;
lString16 htmlLang;
bool insideHtmlTag;

public:

/// return content of html/head/style element
Expand Down Expand Up @@ -2486,6 +2490,9 @@ class ldomDocumentFragmentWriter : public LVXMLParserCallback
insideTag = false;
headStyleText.clear();
headStyleState = 0;
insideHtmlTag = false;
htmlDir.clear();
htmlLang.clear();
}
/// called on parsing end
virtual void OnStop()
Expand Down Expand Up @@ -2525,7 +2532,8 @@ class ldomDocumentFragmentWriter : public LVXMLParserCallback
/// constructor
ldomDocumentFragmentWriter( LVXMLParserCallback * parentWriter, lString16 baseTagName, lString16 baseTagReplacementName, lString16 fragmentFilePath )
: parent(parentWriter), baseTag(baseTagName), baseTagReplacement(baseTagReplacementName),
insideTag(false), styleDetectionState(0), pathSubstitutions(100), baseElement(NULL), lastBaseElement(NULL), headStyleState(0)
insideTag(false), styleDetectionState(0), pathSubstitutions(100), baseElement(NULL), lastBaseElement(NULL),
headStyleState(0), insideHtmlTag(false)
{
setCodeBase( fragmentFilePath );
}
Expand Down
Loading