From 6ab9290eca1ec6f15d874622dca2a26b05fb8aa6 Mon Sep 17 00:00:00 2001 From: rudolfkoenig <> Date: Mon, 14 Feb 2022 20:39:19 +0000 Subject: [PATCH] fhem.pl: experimental encoding unicode (Forum #126088) git-svn-id: https://svn.fhem.de/fhem/trunk@25679 2b470e98-0d58-463d-a4d8-8e2adae1ed80 --- fhem/FHEM/01_FHEMWEB.pm | 22 ++++++++++++----- fhem/FHEM/98_telnet.pm | 6 ++++- fhem/FHEM/HttpUtils.pm | 3 +++ fhem/fhem.pl | 55 ++++++++++++++++++++++++++++------------- 4 files changed, 62 insertions(+), 24 deletions(-) diff --git a/fhem/FHEM/01_FHEMWEB.pm b/fhem/FHEM/01_FHEMWEB.pm index 4a6fa1d0b..638b52319 100644 --- a/fhem/FHEM/01_FHEMWEB.pm +++ b/fhem/FHEM/01_FHEMWEB.pm @@ -358,6 +358,8 @@ FW_Read($$) # Data from HTTP Client my $buf; my $ret = sysread($c, $buf, 1024); + $buf = Encode::decode($hash->{encoding}, $buf) + if($unicodeEncoding && $hash->{encoding}); if(!defined($ret) && $! == EWOULDBLOCK ){ $hash->{wantWrite} = 1 @@ -461,6 +463,10 @@ FW_Read($$) $k =~ s/(\w+)/\u$1/g; #39203 $k=>(defined($v) ? $v : 1); } @FW_httpheader; + if(!$hash->{encoding}) { + my $ct = $FW_httpheader{"Content-Type"}; + $hash->{encoding} = ($ct && $ct =~ m/charset\s*=\s*(\S*)/i ? $1 : $FW_encoding); + } delete($hash->{HDR}); my @origin = grep /Origin/i, @FW_httpheader; @@ -612,12 +618,14 @@ FW_finishRead($$$) my $name = $hash->{NAME}; my $compressed = ""; + my $encoded = ""; if($FW_RETTYPE =~ m/(text|xml|json|svg|script)/i && ($FW_httpheader{"Accept-Encoding"} && $FW_httpheader{"Accept-Encoding"} =~ m/gzip/) && $FW_use{zlib}) { - utf8::encode($FW_RET) - if(utf8::is_utf8($FW_RET) && $FW_RET =~ m/[^\x00-\xFF]/ ); + $FW_RET = Encode::encode($hash->{encoding}, $FW_RET) + if($unicodeEncoding || (utf8::is_utf8($FW_RET) && $FW_RET =~ m/[^\x00-\xFF]/)); + $encoded = 1; eval { $FW_RET = Compress::Zlib::memGzip($FW_RET); }; if($@) { Log 1, "memGzip: $@"; $FW_RET=""; #Forum #29939 @@ -637,8 +645,8 @@ FW_finishRead($$$) "HTTP/1.1 $FW_httpRetCode\r\n" . "Content-Length: $length\r\n" . $expires . $compressed . $FW_headerlines . - "Content-Type: $FW_RETTYPE\r\n\r\n" . - $FW_RET, "FW_closeConn", 1) ){ + "Content-Type: text/html; charset=$FW_RETTYPE\r\n\r\n" . + $FW_RET, "FW_closeConn", 1, $encoded) ){ Log3 $name, 4, "Closing connection $name due to full buffer in FW_Read" if(!$hash->{isChild}); FW_closeConn($hash); @@ -713,9 +721,11 @@ FW_initInform($$) sub FW_addToWritebuffer($$@) { - my ($hash, $txt, $callback, $nolimit) = @_; + my ($hash, $txt, $callback, $nolimit, $encoded) = @_; - utf8::encode($txt) if(utf8::is_utf8($txt) && $txt =~ m/[^\x00-\xFF]/ ); + $txt = Encode::encode($hash->{encoding}, $txt) + if(!$encoded && ($unicodeEncoding || + (utf8::is_utf8($txt) && $txt =~ m/[^\x00-\xFF]/))); if( $hash->{websocket} ) { my $len = length($txt); if( $len < 126 ) { diff --git a/fhem/FHEM/98_telnet.pm b/fhem/FHEM/98_telnet.pm index 084bf4fda..b9ed410d8 100644 --- a/fhem/FHEM/98_telnet.pm +++ b/fhem/FHEM/98_telnet.pm @@ -193,6 +193,7 @@ telnet_Read($) my $buf; my $ret = sysread($hash->{CD}, $buf, 256); + if(!defined($ret) || $ret <= 0) { if($hash->{isClient}) { telnet_ClientDisconnect($hash, 0); @@ -217,6 +218,8 @@ telnet_Read($) syswrite($hash->{CD}, sprintf("%c%c%c", 0xff, 0xfc, ord($1))) } } + + $buf = Encode::decode('UTF-8', $buf) if($unicodeEncoding); $hash->{BUF} .= $buf; my @ret; my $gotCmd; @@ -300,7 +303,8 @@ telnet_Output($$$) $ret = "$ret\n" if(!$hash->{showPrompt}); } for(;;) { - utf8::encode($ret) if(utf8::is_utf8($ret) && $ret =~ m/[^\x00-\xFF]/); + $ret = Encode::encode('UTF-8', $ret) if($unicodeEncoding || + utf8::is_utf8($ret) && $ret =~ m/[^\x00-\xFF]/); my $l = syswrite($hash->{CD}, $ret); last if(!$l || $l == length($ret)); $ret = substr($ret, $l); diff --git a/fhem/FHEM/HttpUtils.pm b/fhem/FHEM/HttpUtils.pm index 0001a5e4d..ae53b9ff8 100644 --- a/fhem/FHEM/HttpUtils.pm +++ b/fhem/FHEM/HttpUtils.pm @@ -976,6 +976,9 @@ HttpUtils_ParseAnswer($) return ($@, $ret) if($@); } } + my $encoding = ($hash->{httpheader} =~ m/^Content-Type.*charset=(\S*)/im ? + $1 : 'UTF-8'); + $ret = Encode::decode($encoding, $ret) if($unicodeEncoding); # Debug Log3 $hash, $hash->{loglevel}+1, diff --git a/fhem/fhem.pl b/fhem/fhem.pl index 96415a547..05539057d 100755 --- a/fhem/fhem.pl +++ b/fhem/fhem.pl @@ -31,6 +31,7 @@ use Time::HiRes qw(gettimeofday); use Scalar::Util qw(looks_like_number); use POSIX; use File::Copy qw(copy); +use Encode; ################################################## # Forward declarations @@ -235,6 +236,7 @@ use vars qw($addTimerStacktrace);# set to 1 by fhemdebug use vars qw($auth_refresh); use vars qw($cmdFromAnalyze); # used by the warnings-sub use vars qw($devcount); # Maximum device number, used for storing +use vars qw($unicodeEncoding); # internal encoding is unicode (wide character) use vars qw($featurelevel); use vars qw($fhemForked); # 1 in a fhemFork()'ed process, else undef use vars qw($fhemTestFile); # file to include if -t is specified @@ -245,6 +247,7 @@ use vars qw($internal_data); # FileLog/DbLog -> SVG data transport use vars qw($lastDefChange); # number of last def/attr change use vars qw($lastWarningMsg); # set by the warnings-sub use vars qw($nextat); # Time when next timer will be triggered. +use vars qw($numCPUs); # Number of CPUs on Linux, else 1 use vars qw($reread_active); use vars qw($selectTimestamp); # used to check last select exit timestamp use vars qw($winService); # the Windows Service object @@ -266,7 +269,6 @@ use vars qw(%value); # Current values, see commandref.html use vars qw(@intAtA); # Internal timer array use vars qw(@structChangeHist); # Contains the last 10 structural changes -use vars qw($numCPUs); # Number of CPUs on Linux, else 1 use constant { DAYSECONDS => 86400, @@ -302,6 +304,7 @@ my %sleepers; # list of sleepers my %delayedShutdowns; # definitions needing delayed shutdown my %fuuidHash; # for duplicate checking my $globalUniqueID; # cache it +my $LOG; # Log file handle, formerly LOG my $readytimeout = ($^O eq "MSWin32") ? 0.1 : 5.0; @@ -336,6 +339,7 @@ my @globalAttrList = qw( dnsServer dupTimeout exclude_from_update + encoding:utf8,unicode hideExcludedUpdates:1,0 featurelevel:6.1,6.0,5.9,5.8,5.7,5.6,5.5,99.99 genericDisplayType:switch,outlet,light,blind,speaker,thermostat @@ -1006,7 +1010,7 @@ Log3($$$) } if($logopened) { - print LOG "$tim $loglevel: $text\n"; + print $LOG "$tim $loglevel: $text\n"; } else { print "$tim $loglevel: $text\n"; } @@ -1377,9 +1381,11 @@ CommandInclude($$) my @ret; my $oldcfgfile; - if(!open($fh, $arg)) { + my $type = ($unicodeEncoding ? "< :encoding(UTF-8)" : "<"); + if(!open($fh, $type, $arg)) { return "Can't open $arg: $!"; } + Log 1, "Including $arg"; my @t = localtime(gettimeofday()); my $gcfg = ResolveDateWildcards(AttrVal("global", "configfile", ""), @t); @@ -1426,28 +1432,29 @@ OpenLogfile($) { my $param = shift; - close(LOG); + close($LOG) if($LOG); $logopened=0; $currlogfile = $param; # STDOUT is closed in windows services per default + if(!$winService->{AsAService} && $currlogfile eq "-") { - open LOG, '>&STDOUT' || die "Can't dup stdout: $!"; + open($LOG, '>&STDOUT') || die "Can't dup stdout: $!"; } else { - HandleArchiving($defs{global}) if($defs{global}{currentlogfile}); $defs{global}{currentlogfile} = $param; $defs{global}{logfile} = $attr{global}{logfile}; restoreDir_mkDir($currlogfile=~m,^/,? "":".", $currlogfile, 1); - open(LOG, ">>$currlogfile") || return("Can't open $currlogfile: $!"); + open($LOG, ">>$currlogfile") || return("Can't open $currlogfile: $!"); redirectStdinStdErr(); } - LOG->autoflush(1); + binmode($LOG, ":encoding(UTF-8)") if($unicodeEncoding); + $LOG->autoflush(1); $logopened = 1; - $defs{global}{FD} = LOG->fileno(); + $defs{global}{FD} = $LOG->fileno(); # ?? return undef; } @@ -1609,28 +1616,30 @@ WriteStatefile() my @t = localtime($now); $stateFile = ResolveDateWildcards($stateFile, @t); - if(!open(SFH, ">$stateFile")) { + my $SFH; + if(!open($SFH, ">$stateFile")) { my $msg = "WriteStatefile: Cannot open $stateFile: $!"; Log 1, $msg; return $msg; } + binmode($SFH, ":encoding(UTF-8)") if($unicodeEncoding); my $t = localtime($now); - print SFH "#$t\n"; + print $SFH "#$t\n"; foreach my $d (sort keys %defs) { next if($defs{$d}{TEMPORARY}); if($defs{$d}{VOLATILE}) { my $def = $defs{$d}{DEF}; $def =~ s/;/;;/g; # follow-on-for-timer at - print SFH "define $d $defs{$d}{TYPE} $def\n"; + print $SFH "define $d $defs{$d}{TYPE} $def\n"; } my @arr = GetAllReadings($d); - print SFH join("\n", @arr)."\n" if(@arr); + print $SFH join("\n", @arr)."\n" if(@arr); } - return "$attr{global}{statefile}: $!" if(!close(SFH)); + return "$attr{global}{statefile}: $!" if(!close($SFH)); return ""; } @@ -1721,10 +1730,12 @@ CommandSave($$) $param = $attr{global}{configfile} if(!$param); return "No configfile attribute set and no argument specified" if(!$param); restoreDir_saveFile($restoreDir, $param); - if(!open(SFH, ">$param")) { + my $SFH; + if(!open($SFH, ">$param")) { return "Cannot open $param: $!"; } - my %fh = ("configfile" => *SFH); + binmode($SFH, ":encoding(UTF-8)") if($unicodeEncoding); + my %fh = ("configfile" => $SFH); my %skip; my %devByNr; @@ -1756,6 +1767,7 @@ CommandSave($$) } else { $fh{$cfgfile} = $fh; } + binmode($fh, ":encoding(UTF-8)") if($unicodeEncoding); } next if($skip{$cfgfile}); @@ -1769,7 +1781,7 @@ CommandSave($$) } - print SFH "include $attr{global}{lastinclude}\n" + print $SFH "include $attr{global}{lastinclude}\n" if($attr{global}{lastinclude} && $featurelevel <= 5.6); foreach my $key (keys %fh) { @@ -2869,6 +2881,7 @@ GlobalAttr($$$$) return "The global attribute $name cannot be deleted" if($noDel{$name}); $featurelevel = 6.1 if($name eq "featurelevel"); $haveInet6 = 0 if($name eq "useInet6"); # IPv6 + $unicodeEncoding = undef if($name eq "encoding"); delete($defs{global}{ignoreRegexpObj}) if($name eq "ignoreRegexp"); return undef; } @@ -2877,6 +2890,7 @@ GlobalAttr($$$$) return "$name is readonly, it is set in the FHEM_GLOBALATTR environment" if(defined($ev) && defined($val) && $ev ne $val); + ################ if($name eq "logfile") { my @t = localtime(gettimeofday()); @@ -2887,6 +2901,13 @@ GlobalAttr($$$$) } } + if($name eq "encoding") { # Should be called from fhem.cfg/configDB + return "bad encoding parameter $val, good values are utf8 or unicode" + if($val ne "unicode" && $val ne "utf8"); + $unicodeEncoding = ($val eq "unicode"); + $currlogfile = ""; + } + ################ elsif($name eq "verbose") { if($val =~ m/^[0-5]$/) {