diff --git a/packages/html2text/build.sh b/packages/html2text/build.sh
new file mode 100644
index 000000000..60fe86bf7
--- /dev/null
+++ b/packages/html2text/build.sh
@@ -0,0 +1,7 @@
+TERMUX_PKG_HOMEPAGE=http://www.mbayer.de/html2text/
+TERMUX_PKG_DESCRIPTION="Utility that converts HTML documents into plain text"
+TERMUX_PKG_LICENSE="GPL-2.0"
+TERMUX_PKG_VERSION=1.3.2
+TERMUX_PKG_SRCURL=http://www.mbayer.de/html2text/downloads/html2text-${TERMUX_PKG_VERSION}a.tar.gz
+TERMUX_PKG_SHA256=000b39d5d910b867ff7e087177b470a1e26e2819920dcffd5991c33f6d480392
+TERMUX_PKG_BUILD_IN_SRC=true
diff --git a/packages/html2text/html2text-1.3.2a_Makefile.in.patch b/packages/html2text/html2text-1.3.2a_Makefile.in.patch
new file mode 100644
index 000000000..45d959050
--- /dev/null
+++ b/packages/html2text/html2text-1.3.2a_Makefile.in.patch
@@ -0,0 +1,25 @@
+diff -uNr html2text-1.3.2a/Makefile.in html2text-1.3.2a.mod/Makefile.in
+--- html2text-1.3.2a/Makefile.in 2004-01-14 15:47:02.000000000 +0200
++++ html2text-1.3.2a.mod/Makefile.in 2020-01-19 19:44:43.131479673 +0200
+@@ -29,9 +29,9 @@
+ YFLAGS =
+
+ INSTALLER = install
+-BINDIR = /usr/local/bin
+-MANDIR = /usr/local/man
+-DOCDIR = /usr/share/doc/html2text
++BINDIR = @TERMUX_PREFIX@/bin
++MANDIR = @TERMUX_PREFIX@/share/man
++DOCDIR = @TERMUX_PREFIX@/share/doc/html2text
+
+ CXX = @CXX@
+ BOOL_DEFINITION = @BOOL_DEFINITION@
+@@ -91,7 +91,7 @@
+ # This is mostly thought for RPM builts and users that don't read the documentation.
+
+ install :
+- $(INSTALLER) -s -m 755 html2text $(BINDIR);
++ $(INSTALLER) -m 755 html2text $(BINDIR);
+ $(INSTALLER) -m 644 html2text.1.gz $(MANDIR)/man1;
+ $(INSTALLER) -m 644 html2textrc.5.gz $(MANDIR)/man5;
+ $(INSTALLER) -d -m 755 $(DOCDIR);
diff --git a/packages/html2text/html2text-1.3.2a_configure.patch b/packages/html2text/html2text-1.3.2a_configure.patch
new file mode 100644
index 000000000..38f19dbc1
--- /dev/null
+++ b/packages/html2text/html2text-1.3.2a_configure.patch
@@ -0,0 +1,147 @@
+diff -uNr html2text-1.3.2a/configure html2text-1.3.2a.mod/configure
+--- html2text-1.3.2a/configure 2004-01-12 17:47:18.000000000 +0200
++++ html2text-1.3.2a.mod/configure 2020-01-19 19:43:44.205959803 +0200
+@@ -31,24 +31,6 @@
+ #
+
+ $echo 'Checking C++ compiler... \c';
+-cat <$tmp_file.C;
+-#include
+-int main(int, char **) {
+- std::cout << "hello" << std::endl;
+- return 0;
+-}
+-EOF
+-CXX=unknown;
+-for i in "CC" "g++" "cc" "$CC"; do
+- if $i -c $tmp_file.C 2>/dev/null; then
+- CXX="$i";
+- break;
+- fi;
+-done;
+-if test "$CXX" = unknown; then
+- $echo "Error: Could not find a working C++ compiler.";
+- exit 1;
+-fi;
+ $echo "use \"$CXX\"";
+
+ #
+@@ -57,7 +39,7 @@
+
+ $echo 'Checking ... \c';
+ SYS_POLL_MISSING=unknown;
+-cat <$tmp_file.C;
++cat <$tmp_file.cc;
+ #ifdef SYS_POLL_MISSING /* { */
+ struct pollfd { int fd; short events; short revents; };
+ extern "C" int poll(struct pollfd *ufds, unsigned int nfds, int timeout);
+@@ -76,7 +58,7 @@
+ }
+ EOF
+ for i in "" -DSYS_POLL_MISSING; do
+- if $CXX $tmp_file.C $i -o $tmp_file 2>/dev/null; then
++ if $CXX $tmp_file.cc $i -o $tmp_file 2>/dev/null; then
+ SYS_POLL_MISSING="$i";
+ break;
+ fi;
+@@ -97,7 +79,7 @@
+
+ $echo 'Checking for socket libraries... \c';
+ SOCKET_LIBRARIES=unknown;
+-cat >$tmp_file.C <$tmp_file.cc </dev/null; then
++ if $CXX $tmp_file.cc $i -o $tmp_file 2>/dev/null; then
+ SOCKET_LIBRARIES="$i";
+ break;
+ fi;
+@@ -128,7 +110,7 @@
+
+ $echo 'Checking "bool"... \c';
+ BOOL_DEFINITION=unknown;
+-cat <$tmp_file.C;
++cat <$tmp_file.cc;
+ #ifdef BOOL_DEFINITION
+ BOOL_DEFINITION
+ #endif
+@@ -144,7 +126,7 @@
+ '-DBOOL_DEFINITION="typedef unsigned char bool;const bool false=0,true=1;"' \
+ '-DBOOL_DEFINITION="enum bool{false,true};"'; \
+ do
+- if eval "$CXX $tmp_file.C $i -o $tmp_file 2>/dev/null"; then
++ if eval "$CXX $tmp_file.cc $i -o $tmp_file 2>/dev/null"; then
+ BOOL_DEFINITION="$i";
+ break;
+ fi;
+@@ -165,7 +147,7 @@
+
+ $echo 'Checking "explicit"... \c';
+ EXPLICIT=unknown;
+-cat <$tmp_file.C;
++cat <$tmp_file.cc;
+ struct C {
+ explicit C(int) {}
+ };
+@@ -175,7 +157,7 @@
+ '' \
+ '-Dexplicit='; \
+ do
+- if eval "$CXX $tmp_file.C $i -o $tmp_file 2>/dev/null"; then
++ if eval "$CXX $tmp_file.cc $i -o $tmp_file 2>/dev/null"; then
+ EXPLICIT="$i";
+ break;
+ fi;
+@@ -195,7 +177,7 @@
+ #
+
+ $echo 'Checking Standard C++ library... \c';
+-cat <$tmp_file.C;
++cat <$tmp_file.cc;
+ #include
+ #include
+ #include
+@@ -207,7 +189,7 @@
+ using namespace std;
+ void func() { map x; }
+ EOF
+-if $CXX -c $tmp_file.C 2>/dev/null; then
++if $CXX -c $tmp_file.cc 2>/dev/null; then
+ LIBSTDCXX_INCLUDES="";
+ LIBSTDCXX_LIBS="";
+ $echo 'works; no need to make "./libstd"';
+@@ -223,7 +205,7 @@
+ #
+ AUTO_PTR_BROKEN="";
+ $echo 'Checking "auto_ptr"... \c';
+-cat <$tmp_file.C;
++cat <$tmp_file.cc;
+ #include
+ #include
+ #include
+@@ -243,7 +225,7 @@
+ return 0;
+ }
+ EOF
+-if eval "$CXX -c $LIBSTDCXX_INCLUDES $EXPLICIT $BOOL_DEFINITION $tmp_file.C" 2>/dev/null; then
++if eval "$CXX -c $LIBSTDCXX_INCLUDES $EXPLICIT $BOOL_DEFINITION $tmp_file.cc" 2>/dev/null; then
+ $echo 'defined in , good';
+ else
+ $echo 'not defined or not working, use "./libstd/include/auto_ptr.h"';
+@@ -255,8 +237,8 @@
+ #
+ MAKEDEPEND_INCLUDES="";
+ $echo 'Checking "makedepend" includes... \c';
+-echo "#include " >$tmp_file.C;
+-MAKEDEPEND_INCLUDES=`$CXX -E $tmp_file.C 2>/dev/null |
++echo "#include " >$tmp_file.cc;
++MAKEDEPEND_INCLUDES=`$CXX -E $tmp_file.cc 2>/dev/null |
+ sed -n \
+ -e 's/^#line .*"\(\/.*\)\/.*".*/-I\1/p' \
+ -e 's/^# [1-9][0-9]* "\(\/.*\)\/.*".*/-I\1/p' |
diff --git a/packages/html2text/patch-utf8-html2text-1.3.2a.patch b/packages/html2text/patch-utf8-html2text-1.3.2a.patch
new file mode 100644
index 000000000..442d1871e
--- /dev/null
+++ b/packages/html2text/patch-utf8-html2text-1.3.2a.patch
@@ -0,0 +1,706 @@
+diff -r -u -bB html2text-1.3.2a/Area.C html2text-1.3.2a-patched/Area.C
+--- html2text-1.3.2a/Area.C 2003-11-23 12:05:29.000000000 +0100
++++ html2text-1.3.2a-patched/Area.C 2005-05-13 22:19:59.862137688 +0200
+@@ -36,10 +36,13 @@
+ #include
+
+ #include "Area.h"
++#include "html.h"
+ #include "string.h"
+
+ #define LATIN1_nbsp 160
+
++extern int use_encoding;
++
+ /* ------------------------------------------------------------------------- */
+
+ #define malloc_array(type, size)\
+@@ -81,6 +84,27 @@
+
+ /* ------------------------------------------------------------------------- */
+
++/* utf_length() and utf_width()
++ *
++ * Very simplified algorithm of calculating length of UTF-8
++ * string. No check for errors. Counting only ASCII bytes and
++ * leading bytes of UTF-8 multibyte sequences. All bytes like
++ * 10xxxxxx are dropped. If USE_UTF8 is false then returns
++ * usual length. --YS
++ */
++
++unsigned int
++Line::utf_length(size_type f, size_type t) const
++{
++ size_type m = (t < length_ ? t : length_);
++ size_type r = m - f;
++ if(USE_UTF8) {
++ for (int i = f; i < m; i++)
++ if((cells_[i].character & 0xc0) == 0x80) r--;
++ }
++ return r;
++}
++
+ void
+ Line::resize(size_type l)
+ {
+@@ -236,6 +260,23 @@
+ return *this;
+ }
+
++unsigned int
++Area::utf_width()
++{
++ size_type r = width_;
++ if(USE_UTF8) { r = 0;
++ for (size_type yy = 0; yy < height_; yy++) {
++ size_type r1 = 0;
++ for (int i = width_ - 1; i >= 0; i--) {
++ if(!r1 && isspace(cells_[yy][i].character)) continue;
++ if((cells_[yy][i].character & 0xc0) != 0x80) r1++;
++ }
++ if(r < r1) r = r1;
++ }
++ }
++ return r;
++}
++
+ void
+ Area::resize(size_type w, size_type h)
+ {
+@@ -439,7 +480,7 @@
+ char c = p->character;
+ char a = p->attribute;
+
+- if (c == (char) LATIN1_nbsp) c = ' ';
++ if (c == (char) LATIN1_nbsp && !USE_UTF8) c = ' ';
+
+ if (a == Cell::NONE) {
+ os << c;
+Nur in html2text-1.3.2a-patched/: Area.C.orig.
+diff -r -u -bB html2text-1.3.2a/Area.h html2text-1.3.2a-patched/Area.h
+--- html2text-1.3.2a/Area.h 2003-11-23 12:05:29.000000000 +0100
++++ html2text-1.3.2a-patched/Area.h 2005-05-13 22:19:59.863137536 +0200
+@@ -81,6 +81,8 @@
+ Cell &operator[](size_type x) { return cells_[x]; }
+ const Cell *cells() const { return cells_; }
+
++ unsigned int utf_length(size_type f, size_type t) const;
++
+ void resize(size_type l);
+ void enlarge(size_type l) { if (l > length_) resize(l); }
+
+@@ -134,6 +136,8 @@
+ Cell *operator[](size_type y) { return cells_[y]; }
+ const Area &operator>>=(size_type rs);
+
++ unsigned int utf_width();
++
+ void resize(size_type w, size_type h);
+ void enlarge(size_type w, size_type h);
+
+Nur in html2text-1.3.2a-patched/: Area.h.orig.
+diff -r -u -bB html2text-1.3.2a/format.C html2text-1.3.2a-patched/format.C
+--- html2text-1.3.2a/format.C 2003-11-23 12:05:29.000000000 +0100
++++ html2text-1.3.2a-patched/format.C 2005-05-13 22:19:59.865137232 +0200
+@@ -1210,6 +1210,7 @@
+ }
+
+ Line::size_type to = from + 1;
++ int to_from;
+
+ Line::size_type lbp = (Line::size_type) -1; // "Last break position".
+
+@@ -1238,18 +1239,20 @@
+ to++;
+ }
+
+- if (to - from > w && lbp != (Area::size_type) -1) { to = lbp; break; }
++ if (line.utf_length(from,to) > w && lbp != (Area::size_type) -1)
++ { to = lbp; break; }
+ }
+
++ to_from = line.utf_length(from,to);
+ /*
+ * Copy the "from...to" range from the "line" to the bottom of the "res"
+ * Area.
+ */
+ Area::size_type x = 0;
+ Area::size_type len = to - from;
+- if (halign == Area::LEFT || len >= w) { ; } else
+- if (halign == Area::CENTER) { x += (w - len) / 2; } else
+- if (halign == Area::RIGHT) { x += w - len; }
++ if (halign == Area::LEFT || to_from >= w) { ; } else
++ if (halign == Area::CENTER) { x += (w - to_from) / 2; } else
++ if (halign == Area::RIGHT) { x += w - to_from; }
+ res->insert(line.cells() + from, len, x, res->height());
+
+ /*
+Nur in html2text-1.3.2a-patched/: format.C.orig.
+diff -r -u -bB html2text-1.3.2a/html2text.C html2text-1.3.2a-patched/html2text.C
+--- html2text-1.3.2a/html2text.C 2003-11-23 12:05:29.000000000 +0100
++++ html2text-1.3.2a-patched/html2text.C 2005-05-13 22:19:59.868136776 +0200
+@@ -148,9 +148,10 @@
+ -o Redirect output into \n\
+ -nobs Do not use backspaces for boldface and underlining\n\
+ -ascii Use plain ASCII for output instead of ISO-8859-1\n\
++ -utf8 Assume both terminal and input stream are in UTF-8 mode\n\
+ ";
+
+-int use_iso8859 = 1;
++int use_encoding = ISO8859;
+
+ int
+ main(int argc, char **argv)
+@@ -199,7 +200,8 @@
+ if (!strcmp(arg, "-width" )) { width = atoi(argv[++i]); } else
+ if (!strcmp(arg, "-o" )) { output_file_name = argv[++i]; } else
+ if (!strcmp(arg, "-nobs" )) { use_backspaces = false; } else
+- if (!strcmp(arg, "-ascii" )) { use_iso8859 = false; } else
++ if (!strcmp(arg, "-ascii" )) { use_encoding = ASCII; } else
++ if (!strcmp(arg, "-utf8" )) { use_encoding = UTF8; } else
+ {
+ std::cerr
+ << "Unrecognized command line option \""
+Nur in html2text-1.3.2a-patched/: html2text.C.orig.
+diff -r -u -bB html2text-1.3.2a/html.h html2text-1.3.2a-patched/html.h
+--- html2text-1.3.2a/html.h 2001-10-04 22:03:54.000000000 +0200
++++ html2text-1.3.2a-patched/html.h 2005-05-13 22:19:59.866137080 +0200
+@@ -61,6 +61,11 @@
+
+ /* ------------------------------------------------------------------------- */
+
++enum {ASCII, ISO8859, UTF8};
++#define USE_ISO8859 (use_encoding == ISO8859)
++#define USE_ASCII (use_encoding == ASCII)
++#define USE_UTF8 (use_encoding == UTF8)
++
+ #define LATIN1_nbsp 160
+ #define LATIN1_iexcl 161
+ #define LATIN1_cent 162
+diff -r -u -bB html2text-1.3.2a/sgml.C html2text-1.3.2a-patched/sgml.C
+--- html2text-1.3.2a/sgml.C 2003-11-23 12:09:11.000000000 +0100
++++ html2text-1.3.2a-patched/sgml.C 2005-05-13 22:19:59.870136472 +0200
+@@ -62,261 +62,280 @@
+ char name[8];
+ int iso8859code;
+ char *asciistr;
++ unsigned long unicode;
+ } entities[] = {
+- { "AElig", LATIN1_AElig, "AE" },
+- { "AMP", 0, "&" },
+- { "Aacute", LATIN1_Aacute, "A'" },
+- { "Acirc", LATIN1_Acirc, "A^" },
+- { "Agrave", LATIN1_Agrave, "A`" },
+- { "Alpha", 0, "A" },
+- { "Aring", LATIN1_Aring, "AA" },
+- { "Atilde", LATIN1_Atilde, "A~" },
+- { "Auml", LATIN1_Auml, "A\"" },
+- { "Beta", 0, "B" },
+- { "Ccedil", LATIN1_Ccedil, "C," },
+- { "Chi", 0, "H" },
+- { "Dagger", 0, "++" },
+- { "Delta", 0, "D" },
+- { "ETH", LATIN1_ETH, "D-" },
+- { "Eacute", LATIN1_Eacute, "E'" },
+- { "Ecirc", LATIN1_Ecirc, "E^" },
+- { "Egrave", LATIN1_Egrave, "E`" },
+- { "Epsilon", 0, "E" },
+- { "Eta", 0, "E" },
+- { "Euml", LATIN1_Euml, "E\"" },
+- { "GT", 0, ">" },
+- { "Gamma", 0, "G" },
+- { "Iacute", LATIN1_Iacute, "I'" },
+- { "Icirc", LATIN1_Icirc, "I^" },
+- { "Igrave", LATIN1_Igrave, "I`" },
+- { "Iota", 0, "I" },
+- { "Iuml", LATIN1_Iuml, "I\"" },
+- { "Kappa", 0, "K" },
+- { "LT", 0, "<" },
+- { "Lambda", 0, "L" },
+- { "Mu", 0, "M" },
+- { "Ntilde", LATIN1_Ntilde, "N~" },
+- { "Nu", 0, "N" },
+- { "OElig", 0, "OE" },
+- { "Oacute", LATIN1_Oacute, "O'" },
+- { "Ocirc", LATIN1_Ocirc, "O^" },
+- { "Ograve", LATIN1_Ograve, "O`" },
+- { "Omega", 0, "O" },
+- { "Omicron", 0, "O" },
+- { "Oslash", LATIN1_Oslash, "O/" },
+- { "Otilde", LATIN1_Otilde, "O~" },
+- { "Ouml", LATIN1_Ouml, "O\"" },
+- { "Phi", 0, "F" },
+- { "Pi", 0, "P" },
+- { "Prime", 0, "''" },
+- { "Psi", 0, "PS" },
+- { "QUOT", 0, "\"" },
+- { "Rho", 0, "R" },
+- { "Scaron", 0, "S" },
+- { "Sigma", 0, "S" },
+- { "THORN", LATIN1_THORN, "TH" },
+- { "Tau", 0, "T" },
+- { "Theta", 0, "TH" },
+- { "Uacute", LATIN1_Uacute, "U'" },
+- { "Ucirc", LATIN1_Ucirc, "U^" },
+- { "Ugrave", LATIN1_Ugrave, "U`" },
+- { "Upsilon", 0, "U" },
+- { "Uuml", LATIN1_Uuml, "U\"" },
+- { "Xi", 0, "X" },
+- { "Yacute", LATIN1_Yacute, "Y'" },
+- { "Yuml", 0, "Y\"" },
+- { "Zeta", 0, "Z" },
+- { "aacute", LATIN1_aacute, "a'" },
+- { "acirc", LATIN1_acirc, "a^" },
+- { "acute", LATIN1_acute, "'" },
+- { "aelig", LATIN1_aelig, "ae" },
+- { "agrave", LATIN1_agrave, "a`" },
++ { "AElig", LATIN1_AElig, "AE", 0x00c6},
++ { "AMP", 0, "&", 0x0026},
++ { "Aacute", LATIN1_Aacute, "A'", 0x00c1},
++ { "Acirc", LATIN1_Acirc, "A^", 0x00c2},
++ { "Agrave", LATIN1_Agrave, "A`", 0x00c0},
++ { "Alpha", 0, "A", 0x0391},
++ { "Aring", LATIN1_Aring, "AA", 0x00c5},
++ { "Atilde", LATIN1_Atilde, "A~", 0x00c3},
++ { "Auml", LATIN1_Auml, "A\"", 0x00c4},
++ { "Beta", 0, "B", 0x0392},
++ { "Ccedil", LATIN1_Ccedil, "C,", 0x00c7},
++ { "Chi", 0, "H", 0x03a7},
++ { "Dagger", 0, "++", 0x2020},
++ { "Delta", 0, "D", 0x0394},
++ { "ETH", LATIN1_ETH, "D-", 0x00d0},
++ { "Eacute", LATIN1_Eacute, "E'", 0x00c9},
++ { "Ecirc", LATIN1_Ecirc, "E^", 0x00ca},
++ { "Egrave", LATIN1_Egrave, "E`", 0x00c8},
++ { "Epsilon", 0, "E", 0x0395},
++ { "Eta", 0, "E", 0x0397},
++ { "Euml", LATIN1_Euml, "E\"", 0x00cb},
++ { "GT", 0, ">", 0x003e},
++ { "Gamma", 0, "G", 0x0393},
++ { "Iacute", LATIN1_Iacute, "I'", 0x00cd},
++ { "Icirc", LATIN1_Icirc, "I^", 0x00ce},
++ { "Igrave", LATIN1_Igrave, "I`", 0x00cc},
++ { "Iota", 0, "I", 0x0399},
++ { "Iuml", LATIN1_Iuml, "I\"", 0x00cf},
++ { "Kappa", 0, "K", 0x039a},
++ { "LT", 0, "<", 0x003c},
++ { "Lambda", 0, "L", 0x039b},
++ { "Mu", 0, "M", 0x039c},
++ { "Ntilde", LATIN1_Ntilde, "N~", 0x00d1},
++ { "Nu", 0, "N", 0x039d},
++ { "OElig", 0, "OE", 0x0152},
++ { "Oacute", LATIN1_Oacute, "O'", 0x00d3},
++ { "Ocirc", LATIN1_Ocirc, "O^", 0x00d4},
++ { "Ograve", LATIN1_Ograve, "O`", 0x00d2},
++ { "Omega", 0, "O", 0x03a9},
++ { "Omicron", 0, "O", 0x039f},
++ { "Oslash", LATIN1_Oslash, "O/", 0x00d8},
++ { "Otilde", LATIN1_Otilde, "O~", 0x00d5},
++ { "Ouml", LATIN1_Ouml, "O\"", 0x00d6},
++ { "Phi", 0, "F", 0x03a6},
++ { "Pi", 0, "P", 0x03a0},
++ { "Prime", 0, "''", },
++ { "Psi", 0, "PS", 0x03a8},
++ { "QUOT", 0, "\"", },
++ { "Rho", 0, "R", 0x03a1},
++ { "Scaron", 0, "S", 0x0161},
++ { "Sigma", 0, "S", 0x03a3},
++ { "THORN", LATIN1_THORN, "TH", 0x00de},
++ { "Tau", 0, "T", 0x03a4},
++ { "Theta", 0, "TH", 0x0398},
++ { "Uacute", LATIN1_Uacute, "U'", 0x00da},
++ { "Ucirc", LATIN1_Ucirc, "U^", 0x00db},
++ { "Ugrave", LATIN1_Ugrave, "U`", 0x00d9},
++ { "Upsilon", 0, "U", 0x03a5},
++ { "Uuml", LATIN1_Uuml, "U\"", 0x00dc},
++ { "Xi", 0, "X", 0x039e},
++ { "Yacute", LATIN1_Yacute, "Y'", 0x00dd},
++ { "Yuml", 0, "Y\"", 0x0178},
++ { "Zeta", 0, "Z", 0x0396},
++ { "aacute", LATIN1_aacute, "a'", 0x00e1},
++ { "acirc", LATIN1_acirc, "a^", 0x00e2},
++ { "acute", LATIN1_acute, "'", 0x00b4},
++ { "aelig", LATIN1_aelig, "ae", 0x00e6},
++ { "agrave", LATIN1_agrave, "a`", 0x00e0},
+ { "alefsym", 0, "Aleph" },
+- { "alpha", 0, "a" },
++ { "alpha", 0, "a", 0x03b1},
+ { "amp", 0, "&" },
+ { "and", 0, "AND" },
+ { "ang", 0, "-V" },
+ { "apos", 0, "'" },
+- { "aring", LATIN1_aring, "aa" },
+- { "asymp", 0, "~=" },
+- { "atilde", LATIN1_atilde, "a~" },
+- { "auml", LATIN1_auml, "a\"" },
++ { "aring", LATIN1_aring, "aa", 0x00e5},
++ { "asymp", 0, "~=", 0x2248},
++ { "atilde", LATIN1_atilde, "a~", 0x00e3},
++ { "auml", LATIN1_auml, "a\"", 0x00e5},
+ { "bdquo", 0, "\"" },
+- { "beta", 0, "b" },
+- { "brvbar", LATIN1_brvbar, "|" },
+- { "bull", 0, " o " },
++ { "beta", 0, "b", 0x03b2},
++ { "brvbar", LATIN1_brvbar, "|", 0x00a6},
++ { "bull", 0, " o ", 0x2022},
+ { "cap", 0, "(U" },
+- { "ccedil", LATIN1_ccedil, "c," },
+- { "cedil", LATIN1_cedil, "," },
+- { "cent", LATIN1_cent, "-c-" },
+- { "chi", 0, "h" },
+- { "circ", 0, "^" },
++ { "ccedil", LATIN1_ccedil, "c,", 0x00e7},
++ { "cedil", LATIN1_cedil, ",", 0x00b8},
++ { "cent", LATIN1_cent, "-c-", 0x00a2},
++ { "chi", 0, "h", 0x03c7},
++ { "circ", 0, "^", 0x005e},
+ // { "clubs", 0, "[clubs]" },
+ { "cong", 0, "?=" },
+- { "copy", LATIN1_copy, "(c)" },
++ { "copy", LATIN1_copy, "(c)", 0x00a9},
+ { "crarr", 0, "<-'" },
+ { "cup", 0, ")U" },
+- { "curren", LATIN1_curren, "CUR" },
++ { "curren", LATIN1_curren, "CUR", 0x00a4},
+ { "dArr", 0, "vv" },
+- { "dagger", 0, "+" },
++ { "dagger", 0, "+", 0x2020},
+ { "darr", 0, "v" },
+- { "deg", LATIN1_deg, "DEG" },
+- { "delta", 0, "d" },
++ { "deg", LATIN1_deg, "DEG", 0x00b0},
++ { "delta", 0, "d", 0x03b4},
+ // { "diams", 0, "[diamonds]" },
+- { "divide", LATIN1_divide, "/" },
+- { "eacute", LATIN1_eacute, "e'" },
+- { "ecirc", LATIN1_ecirc, "e^" },
+- { "egrave", LATIN1_egrave, "e`" },
++ { "divide", LATIN1_divide, "/", 0x00f7},
++ { "eacute", LATIN1_eacute, "e'", 0x00e9},
++ { "ecirc", LATIN1_ecirc, "e^", 0x00ea},
++ { "egrave", LATIN1_egrave, "e`", 0x00e8},
+ { "empty", 0, "{}" },
+- { "epsilon", 0, "e" },
+- { "equiv", 0, "==" },
+- { "eta", 0, "e" },
+- { "eth", LATIN1_eth, "d-" },
+- { "euml", LATIN1_euml, "e\"" },
+- { "euro", 0, "EUR" },
++ { "epsilon", 0, "e", 0x03b5},
++ { "equiv", 0, "==", 0x2261},
++ { "eta", 0, "e", 0x03b7},
++ { "eth", LATIN1_eth, "d-", 0x00f0},
++ { "euml", LATIN1_euml, "e\"", 0x00eb},
++ { "euro", 0, "EUR", 0x20ac},
+ { "exist", 0, "TE" },
+ { "fnof", 0, "f" },
+ { "forall", 0, "FA" },
+- { "frac12", LATIN1_frac12, " 1/2" },
+- { "frac14", LATIN1_frac14, " 1/4" },
+- { "frac34", LATIN1_frac34, " 3/4" },
++ { "frac12", LATIN1_frac12, " 1/2",0x00bd},
++ { "frac14", LATIN1_frac14, " 1/4",0x00bc},
++ { "frac34", LATIN1_frac34, " 3/4",0x00be},
+ { "frasl", 0, "/" },
+- { "gamma", 0, "g" },
+- { "ge", 0, ">=" },
+- { "gt", 0, ">" },
++ { "gamma", 0, "g", 0x03b3},
++ { "ge", 0, ">=", 0x2265},
++ { "gt", 0, ">", 0x003e},
+ { "hArr", 0, "<=>" },
+ { "harr", 0, "<->" },
+ // { "hearts", 0, "[hearts]" },
+- { "hellip", 0, "..." },
+- { "iacute", LATIN1_iacute, "i'" },
+- { "icirc", LATIN1_icirc, "i^" },
+- { "iexcl", LATIN1_iexcl, "!" },
+- { "igrave", LATIN1_igrave, "i`" },
++ { "hellip", 0, "...", 0x2026},
++ { "iacute", LATIN1_iacute, "i'", 0x00ed},
++ { "icirc", LATIN1_icirc, "i^", 0x00ee},
++ { "iexcl", LATIN1_iexcl, "!", 0x00a1},
++ { "igrave", LATIN1_igrave, "i`", 0x00ec},
+ { "image", 0, "Im" },
+- { "infin", 0, "oo" },
+- { "int", 0, "INT" },
+- { "iota", 0, "i" },
+- { "iquest", LATIN1_iquest, "?" },
++ { "infin", 0, "oo", 0x221e},
++ { "int", 0, "INT", 0x222b},
++ { "iota", 0, "i", 0x03b9},
++ { "iquest", LATIN1_iquest, "?", 0x00bf},
+ { "isin", 0, "(-" },
+- { "iuml", LATIN1_iuml, "i\"" },
+- { "kappa", 0, "k" },
++ { "iuml", LATIN1_iuml, "i\"", 0x00ef},
++ { "kappa", 0, "k", 0x03ba},
+ { "lArr", 0, "<=" },
+- { "lambda", 0, "l" },
++ { "lambda", 0, "l", 0x03bb},
+ { "lang", 0, "" },
+ { "laquo", LATIN1_laquo, "<<" },
+- { "larr", 0, "<-" },
++ { "larr", 0, "<-", 0x2190},
+ // { "lceil", 0, "<|" },
+ { "ldquo", 0, "\"" },
+- { "le", 0, "<=" },
++ { "le", 0, "<=", 0x2264},
+ // { "lfloor", 0, "|<" },
+ { "lowast", 0, "*" },
+ { "loz", 0, "<>" },
+ { "lsaquo", 0, "<" },
+ { "lsquo", 0, "`" },
+- { "lt", 0, "<" },
+- { "macr", LATIN1_macr, "-" },
++ { "lt", 0, "<", 0x003c},
++ { "macr", LATIN1_macr, "-", 0x00af},
+ { "mdash", 0, "--" },
+- { "micro", LATIN1_micro, "my" },
+- { "middot", LATIN1_middot, "." },
+- { "minus", 0, "-" },
+- { "mu", 0, "m" },
++ { "micro", LATIN1_micro, "my", 0x00b5},
++ { "middot", LATIN1_middot, ".", 0x00b7},
++ { "minus", 0, "-", 0x2212},
++ { "mu", 0, "m", 0x03bc},
+ { "nabla", 0, "Nabla" },
+- { "nbsp", LATIN1_nbsp, " " },
++ { "nbsp", LATIN1_nbsp, " ", 0x00a0},
+ { "ndash", 0, "-" },
+- { "ne", 0, "!=" },
++ { "ne", 0, "!=", 0x2260},
+ { "ni", 0, "-)" },
+ { "not", LATIN1_not, "NOT" },
+ { "notin", 0, "!(-" },
+ { "nsub", 0, "!(C" },
+- { "ntilde", LATIN1_ntilde, "n~" },
+- { "nu", 0, "n" },
+- { "oacute", LATIN1_oacute, "o'" },
+- { "ocirc", LATIN1_ocirc, "o^" },
++ { "ntilde", LATIN1_ntilde, "n~", 0x00f1},
++ { "nu", 0, "n", 0x03bd},
++ { "oacute", LATIN1_oacute, "o'", 0x00f3},
++ { "ocirc", LATIN1_ocirc, "o^", 0x00f4},
+ { "oelig", 0, "oe" },
+- { "ograve", LATIN1_ograve, "o`" },
++ { "ograve", LATIN1_ograve, "o`", 0x00f2},
+ { "oline", LATIN1_macr, "-" },
+- { "omega", 0, "o" },
+- { "omicron", 0, "o" },
++ { "omega", 0, "o", 0x03c9},
++ { "omicron", 0, "o", 0x03bf},
+ { "oplus", 0, "(+)" },
+ { "or", 0, "OR" },
+- { "ordf", LATIN1_ordf, "-a" },
+- { "ordm", LATIN1_ordm, "-o" },
+- { "oslash", LATIN1_oslash, "o/" },
+- { "otilde", LATIN1_otilde, "o~" },
++ { "ordf", LATIN1_ordf, "-a", 0x00aa},
++ { "ordm", LATIN1_ordm, "-o", 0x00ba},
++ { "oslash", LATIN1_oslash, "o/", 0x00f8},
++ { "otilde", LATIN1_otilde, "o~", 0x00f5},
+ { "otimes", 0, "(x)" },
+- { "ouml", LATIN1_ouml, "o\"" },
+- { "para", LATIN1_para, "P:" },
+- { "part", 0, "PART" },
+- { "permil", 0, " 0/00" },
++ { "ouml", LATIN1_ouml, "o\"", 0x00f6},
++ { "para", LATIN1_para, "P:", 0x00b6},
++ { "part", 0, "PART",0x2202},
++ { "permil", 0, " 0/00",0x2030},
+ { "perp", 0, "-T" },
+- { "phi", 0, "f" },
+- { "pi", 0, "p" },
++ { "phi", 0, "f", 0x03c6},
++ { "pi", 0, "p", 0x03c0},
+ { "piv", 0, "Pi" },
+- { "plusmn", LATIN1_plusmn, "+/-" },
+- { "pound", LATIN1_pound, "-L-" },
++ { "plusmn", LATIN1_plusmn, "+/-", 0x00b1},
++ { "pound", LATIN1_pound, "-L-", 0x00a3},
+ { "prime", 0, "'" },
+- { "prod", 0, "PROD" },
++ { "prod", 0, "PROD",0x220f},
+ { "prop", 0, "0(" },
+- { "psi", 0, "ps" },
++ { "psi", 0, "ps", 0x03c8},
+ { "quot", 0, "\"" },
+ { "rArr", 0, "=>" },
+- { "radic", 0, "SQRT" },
++ { "radic", 0, "SQRT",0x221a},
+ { "rang", 0, "/>" },
+ { "raquo", LATIN1_raquo, ">>" },
+- { "rarr", 0, "->" },
++ { "rarr", 0, "->", 0x2192},
+ // { "rceil", 0, ">|" },
+ { "rdquo", 0, "\"" },
+ { "real", 0, "Re" },
+- { "reg", LATIN1_reg, "(R)" },
++ { "reg", LATIN1_reg, "(R)", 0x00ae},
+ // { "rfloor", 0, "|>" },
+- { "rho", 0, "r" },
++ { "rho", 0, "r", 0x03c1},
+ { "rsaquo", 0, ">" },
+ { "rsquo", 0, "'" },
+ { "sbquo", 0, "'" },
+- { "scaron", 0, "s" },
++ { "scaron", 0, "s", 0x0161},
+ { "sdot", 0, "DOT" },
+- { "sect", LATIN1_sect, "S:" },
++ { "sect", LATIN1_sect, "S:", 0x00a7},
+ { "shy", LATIN1_shy, "" },
+- { "sigma", 0, "s" },
+- { "sigmaf", 0, "s" },
++ { "sigma", 0, "s", 0x03c3},
++ { "sigmaf", 0, "s", 0x03c2},
+ { "sim", 0, "~" },
+ // { "spades", 0, "[spades]" },
+ { "sub", 0, "(C" },
+ { "sube", 0, "(_" },
+- { "sum", 0, "SUM" },
++ { "sum", 0, "SUM", 0x2211},
+ { "sup", 0, ")C" },
+- { "sup1", LATIN1_sup1, "^1" },
+- { "sup2", LATIN1_sup2, "^2" },
+- { "sup3", LATIN1_sup3, "^3" },
++ { "sup1", LATIN1_sup1, "^1", 0x00b9},
++ { "sup2", LATIN1_sup2, "^2", 0x00b2},
++ { "sup3", LATIN1_sup3, "^3", 0x00b3},
+ { "supe", 0, ")_" },
+- { "szlig", LATIN1_szlig, "ss" },
+- { "tau", 0, "t" },
++ { "szlig", LATIN1_szlig, "ss", 0x00df},
++ { "tau", 0, "t", 0x03c4},
+ { "there4", 0, ".:" },
+- { "theta", 0, "th" },
+- { "thorn", LATIN1_thorn, "th" },
+- { "tilde", 0, "~" },
+- { "times", LATIN1_times, "x" },
+- { "trade", 0, "[TM]" },
++ { "theta", 0, "th", 0x03b8},
++ { "thorn", LATIN1_thorn, "th", 0x00fe},
++ { "tilde", 0, "~", 0x02dc},
++ { "times", LATIN1_times, "x", 0x00d7},
++ { "trade", 0, "[TM]",0x2122},
+ { "uArr", 0, "^^" },
+- { "uacute", LATIN1_uacute, "u'" },
++ { "uacute", LATIN1_uacute, "u'", 0x00fa},
+ { "uarr", 0, "^" },
+- { "ucirc", LATIN1_ucirc, "u^" },
+- { "ugrave", LATIN1_ugrave, "u`" },
+- { "uml", LATIN1_uml, "\"" },
+- { "upsilon", 0, "u" },
+- { "uuml", LATIN1_uuml, "u\"" },
++ { "ucirc", LATIN1_ucirc, "u^", 0x00fb},
++ { "ugrave", LATIN1_ugrave, "u`", 0x00f9},
++ { "uml", LATIN1_uml, "\"", 0x00a8},
++ { "upsilon", 0, "u", 0x03c5},
++ { "uuml", LATIN1_uuml, "u\"", 0x00fc},
+ { "weierp", 0, "P" },
+- { "xi", 0, "x" },
+- { "yacute", LATIN1_yacute, "y'" },
+- { "yen", LATIN1_yen, "YEN" },
+- { "yuml", LATIN1_yuml, "y\"" },
+- { "zeta", 0, "z" },
++ { "xi", 0, "x", 0x03be},
++ { "yacute", LATIN1_yacute, "y'", 0x00fd},
++ { "yen", LATIN1_yen, "YEN", 0x00a5},
++ { "yuml", LATIN1_yuml, "y\"", 0x00ff},
++ { "zeta", 0, "z", 0x03b6},
+ };
+
+-extern int use_iso8859;
++extern int use_encoding;
+
+ /* ------------------------------------------------------------------------- */
+
++char ubuf[4];
++
++char *mkutf(unsigned long x)
++{
++ memset(ubuf, 0, 4);
++ if(x < 128) ubuf[0] = x;
++ else if(x < 0x800) {
++ ubuf[0] = (0xc0 | ((x >> 6) & 0x1f));
++ ubuf[1] = (0x80 | (x & 0x3f));
++ }
++ else {
++ ubuf[0] = (0xe0 | ((x >> 12) & 0x0f));
++ ubuf[1] = (0x80 | ((x >> 6) & 0x3f));
++ ubuf[2] = (0x80 | (x & 0x3f));
++ }
++ return ubuf;
++}
++
+ void
+ replace_sgml_entities(string *s)
+ {
+@@ -330,9 +349,9 @@
+ */
+ while (j < l && s->at(j) != '&') ++j;
+ /*
+- * We could convert high-bit chars to "é" here if use_iso8859
+- * is off, then let them be translated or not. Is the purpose of
+- * !use_iso8859 to allow SGML entities to be seen, or to strongly
++ * We could convert high-bit chars to "é" here if USE_ASCII
++ * is on, then let them be translated or not. Is the purpose of
++ * USE_ASCII to allow SGML entities to be seen, or to strongly
+ * filter against high-ASCII chars that might blow up a terminal
+ * that doesn't speak ISO8859? For the moment, "allow SGML entities
+ * to be seen" -- no filtering here.
+@@ -370,7 +389,11 @@
+ if (!isdigit(c)) break;
+ x = 10 * x + c - '0';
+ }
+- if (use_iso8859 || (x < 128)) {
++ if (USE_UTF8) {
++ s->replace(beg, j - beg, mkutf(x));
++ j = beg + 1;
++ }
++ else if (USE_ISO8859 && (x < 256) || USE_ASCII && (x < 128)) {
+ s->replace(beg, j - beg, 1, (char) x);
+ j = beg + 1;
+ } else {
+@@ -408,13 +431,17 @@
+ (int (*)(const void *, const void *)) strcmp
+ );
+ if (entity != NULL) {
+- if (use_iso8859 && entity->iso8859code) {
++ if (USE_ISO8859 && entity->iso8859code) {
+ s->replace(beg, j - beg, 1, (char) entity->iso8859code);
+ j = beg + 1;
+- } else if (entity->asciistr) {
++ } else if (USE_ASCII && entity->asciistr) {
+ s->replace(beg, j - beg, entity->asciistr);
+ j = beg + 1;
+ } /* else don't replace it at all, we don't have a translation */
++ else if(USE_UTF8 && entity->unicode) {
++ s->replace(beg, j - beg, mkutf(entity->unicode));
++ j = beg + 1;
++ }
+ }
+ } else {
+ ; /* EXTENSION: Allow literal '&' sometimes. */
+diff -r -u -bB html2text-1.3.2a/table.C html2text-1.3.2a-patched/table.C
+--- html2text-1.3.2a/table.C 2002-07-22 13:32:50.000000000 +0200
++++ html2text-1.3.2a-patched/table.C 2005-05-13 22:19:59.871136320 +0200
+@@ -175,7 +175,7 @@
+ - (*number_of_columns_return - 1) * (column_spacing + 0),
+ Area::LEFT // Yields better results than "p->halign"!
+ ));
+- p->width = tmp.get() ? tmp->width() : 0;
++ p->width = tmp.get() ? tmp->utf_width() : 0;
+ }
+ p->minimized = false;
+
+@@ -308,7 +308,7 @@
+ left_of_column + old_column_width - 1,
+ Area::LEFT // Yields better results than "lc.halign"!
+ ));
+- w = tmp->width();
++ w = tmp->utf_width();
+ if (w >= left_of_column + old_column_width) lc.minimized = true;
+ }
+ if (w > left_of_column + new_column_width) {