/* Pango * pango-break.h: * * Copyright (C) 1999 Red Hat Software * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ #ifndef __PANGO_BREAK_H__ #define __PANGO_BREAK_H__ #include G_BEGIN_DECLS #include /* Logical attributes of a character. */ /** * PangoLogAttr: * @is_line_break: if set, can break line in front of character * @is_mandatory_break: if set, must break line in front of character * @is_char_break: if set, can break here when doing character wrapping * @is_white: is whitespace character * @is_cursor_position: if set, cursor can appear in front of character. * i.e. this is a grapheme boundary, or the first character * in the text. * This flag implements Unicode's * Grapheme * Cluster Boundaries semantics. * @is_word_start: is first character in a word * @is_word_end: is first non-word char after a word * Note that in degenerate cases, you could have both @is_word_start * and @is_word_end set for some character. * @is_sentence_boundary: is a sentence boundary. * There are two ways to divide sentences. The first assigns all * inter-sentence whitespace/control/format chars to some sentence, * so all chars are in some sentence; @is_sentence_boundary denotes * the boundaries there. The second way doesn't assign * between-sentence spaces, etc. to any sentence, so * @is_sentence_start/@is_sentence_end mark the boundaries of those sentences. * @is_sentence_start: is first character in a sentence * @is_sentence_end: is first char after a sentence. * Note that in degenerate cases, you could have both @is_sentence_start * and @is_sentence_end set for some character. (e.g. no space after a * period, so the next sentence starts right away) * @backspace_deletes_character: if set, backspace deletes one character * rather than the entire grapheme cluster. This * field is only meaningful on grapheme * boundaries (where @is_cursor_position is * set). In some languages, the full grapheme * (e.g. letter + diacritics) is considered a * unit, while in others, each decomposed * character in the grapheme is a unit. In the * default implementation of pango_break(), this * bit is set on all grapheme boundaries except * those following Latin, Cyrillic or Greek base characters. * @is_expandable_space: is a whitespace character that can possibly be * expanded for justification purposes. (Since: 1.18) * @is_word_boundary: is a word boundary. * More specifically, means that this is not a position in the middle * of a word. For example, both sides of a punctuation mark are * considered word boundaries. This flag is particularly useful when * selecting text word-by-word. * This flag implements Unicode's * Word * Boundaries semantics. (Since: 1.22) * * The #PangoLogAttr structure stores information * about the attributes of a single character. */ struct _PangoLogAttr { guint is_line_break : 1; /* Can break line in front of character */ guint is_mandatory_break : 1; /* Must break line in front of character */ guint is_char_break : 1; /* Can break here when doing char wrap */ guint is_white : 1; /* Whitespace character */ /* Cursor can appear in front of character (i.e. this is a grapheme * boundary, or the first character in the text). */ guint is_cursor_position : 1; /* Note that in degenerate cases, you could have both start/end set on * some text, most likely for sentences (e.g. no space after a period, so * the next sentence starts right away). */ guint is_word_start : 1; /* first character in a word */ guint is_word_end : 1; /* is first non-word char after a word */ /* There are two ways to divide sentences. The first assigns all * intersentence whitespace/control/format chars to some sentence, * so all chars are in some sentence; is_sentence_boundary denotes * the boundaries there. The second way doesn't assign * between-sentence spaces, etc. to any sentence, so * is_sentence_start/is_sentence_end mark the boundaries of those * sentences. */ guint is_sentence_boundary : 1; guint is_sentence_start : 1; /* first character in a sentence */ guint is_sentence_end : 1; /* first non-sentence char after a sentence */ /* If set, backspace deletes one character rather than * the entire grapheme cluster. */ guint backspace_deletes_character : 1; /* Only few space variants (U+0020 and U+00A0) have variable * width during justification. */ guint is_expandable_space : 1; /* Word boundary as defined by UAX#29 */ guint is_word_boundary : 1; /* is NOT in the middle of a word */ }; /* Determine information about cluster/word/line breaks in a string * of Unicode text. */ PANGO_AVAILABLE_IN_ALL void pango_break (const gchar *text, int length, PangoAnalysis *analysis, PangoLogAttr *attrs, int attrs_len); PANGO_AVAILABLE_IN_ALL void pango_find_paragraph_boundary (const gchar *text, gint length, gint *paragraph_delimiter_index, gint *next_paragraph_start); PANGO_AVAILABLE_IN_ALL void pango_get_log_attrs (const char *text, int length, int level, PangoLanguage *language, PangoLogAttr *log_attrs, int attrs_len); #ifdef PANGO_ENABLE_ENGINE /* This is the default break algorithm, used if no language * engine overrides it. Normally you should use pango_break() * instead; this function is mostly useful for chaining up * from a language engine override. */ PANGO_AVAILABLE_IN_ALL void pango_default_break (const gchar *text, int length, PangoAnalysis *analysis, PangoLogAttr *attrs, int attrs_len); #endif /* PANGO_ENABLE_ENGINE */ G_END_DECLS #endif /* __PANGO_BREAK_H__ */