Backporting Make sure control chars from HTTP header don't end up in html,csv,json

This is for 3.0. For 3.1dev, see #2332 .

This PR addresses the bug #2330 by implementing a function which removes control characters from the file output format html,csv,json in the output.
In every instance called there's a check before whether the string contains control chars, hoping it'll save a few milli seconds.

A tr function is used, omitting LF.
It doesn't filter the terminal output and the log file output, yet. It provides a function though which is not being called.
This commit is contained in:
Dirk Wetter 2023-03-12 17:40:02 +01:00
parent 34453664a1
commit 828af39053
3 changed files with 46 additions and 14 deletions

View File

@ -13,4 +13,4 @@ jobs:
- uses: codespell-project/actions-codespell@master
skip: ca_hashes.txt,tls_data.txt,*.pem,OPENSSL-LICENSE.txt,.git
ignore_words_list: borken,gost,ciph,ba,bloc,isnt,chello,fo,alle,nmake,aNULL
ignore_words_list: borken,gost,ciph,ba,bloc,isnt,chello,fo,alle,nmake,anull

View File

@ -13,25 +13,26 @@ my $prg="./";
my $uri="";
my $out="";
my $html="";
my $htmlfile="tmp.html";
my $debughtml="";
my $edited_html="";
my $check2run="--ip=one --color 0 --htmlfile tmp.html";
my $check2run="--ip=one --color 0 --htmlfile $htmlfile";
my $diff="";
die "Unable to open $prg" unless -f $prg;
printf "\n%s\n", "Doing HTML output checks";
unlink 'tmp.html';
unlink $htmlfile;
printf "%s\n", " .. running $prg against \"$uri\" to create HTML and terminal outputs (may take ~2 minutes)";
# specify a TERM_WIDTH so that the two calls to don't create HTML files with different values of TERM_WIDTH
$out = `TERM_WIDTH=120 $prg $check2run $uri`;
$html = `cat tmp.html`;
$html = `cat $htmlfile`;
# $edited_html will contain the HTML with formatting information removed in order to compare against terminal output
# Start by removing the HTML header.
$edited_html = `tail -n +11 tmp.html`;
unlink 'tmp.html';
$edited_html = `tail -n +11 $htmlfile`;
unlink $htmlfile;
# Remove the HTML footer
$edited_html =~ s/\n\<\/pre\>\n\<\/body\>\n\<\/html\>//;
@ -49,12 +50,13 @@ $edited_html =~ s/&apos;/'/g;
cmp_ok($edited_html, "eq", $out, "HTML file matches terminal output");
printf "\n%s\n", " .. running again $prg against \"$uri\", now with --debug 4 to create HTML output (may take another ~2 minutes)";
# Redirect stderr to /dev/null in order to avoid some unexplained "date: invalid date" error messages
$out = `TERM_WIDTH=120 $prg $check2run --debug 4 $uri 2> /dev/null`;
$debughtml = `cat tmp.html`;
unlink 'tmp.html';
$debughtml = `cat $htmlfile`;
unlink $htmlfile;
# Remove date information from the Start and Done banners in the two HTML files, since they were created at different times
$html =~ s/Start 2[0-9][0-9][0-9]-[0-3][0-9]-[0-3][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9]/Start XXXX-XX-XX XX:XX:XX/;
@ -70,6 +72,7 @@ $debughtml =~ s/HTTP clock skew \+?-?[0-9]* /HTTP clock skew
$debughtml =~ s/ Pre-test: .*\n//g;
$debughtml =~ s/.*OK: below 825 days.*\n//g;
$debughtml =~ s/.*DEBUG:.*\n//g;
$debughtml =~ s/No engine or GOST support via engine with your.*\n//g;
cmp_ok($debughtml, "eq", $html, "HTML file created with --debug 4 matches HTML file created without --debug");

View File

@ -490,7 +490,6 @@ show_finding() {
local output
"$do_html" || return 0
#sed -e 's/\&/\&amp;/g' -e 's/</\&lt;/g' -e 's/>/\&gt;/g' -e 's/"/\&quot;/g' -e "s/'/\&apos;/g" <<< "$1"
@ -501,8 +500,26 @@ html_reserved(){
html_out() {
local outstr="$1"
"$do_html" || return 0
[[ -n "$HTMLFILE" ]] && [[ ! -d "$HTMLFILE" ]] && printf -- "%b" "$1" >> "$HTMLFILE"
if [[ -n "$HTMLFILE" ]] && [[ ! -d "$HTMLFILE" ]]; then
if [[ "$outstr" =~ [[:cntrl:]] ]]; then
outstr="$(sanitize_fileout "$outstr")"
printf -- "%b" "$outstr" >> "$HTMLFILE"
# Removes non-printable chars in CSV, JSON, HTML, see #2330
sanitize_fileout() {
tr -d '\000-\011\013-\037' <<< "$1"
# Removes non-printable chars in terminal output (log files)
# We need to keep the color ANSI escape code x1b, o33, see #2330
sanitize_termout() {
tr -d '\000-\011\013-\032\034-\037' <<< "$1"
# This is intentionally the same.
@ -806,6 +823,9 @@ fileout_json_print_parameter() {
spaces=" " || \
spaces=" "
if [[ -n "$value" ]] || [[ "$parameter" == finding ]]; then
if [[ "$value" =~ [[:cntrl:]] ]]; then
value="$(sanitize_fileout "$value")"
printf -- "%b%b%b%b" "$spaces" "\"$parameter\"" "$filler" ": \"$value\"" >> "$JSONFILE"
"$not_last" && printf ",\n" >> "$JSONFILE"
@ -931,12 +951,19 @@ fileout_insert_warning() {
# args: "id" "fqdn/ip" "port" "severity" "finding" "cve" "cwe" "hint"
fileout_csv_finding() {
local finding="$5"
if [[ "$finding" =~ [[:cntrl:]] ]]; then
finding="$(sanitize_fileout "$finding")"
safe_echo "\"$1\"," >> "$CSVFILE"
safe_echo "\"$2\"," >> "$CSVFILE"
safe_echo "\"$3\"," >> "$CSVFILE"
safe_echo "\"$4\"," >> "$CSVFILE"
safe_echo "\"$5\"," >> "$CSVFILE"
safe_echo "\"$finding\"," >> "$CSVFILE"
safe_echo "\"$6\"," >> "$CSVFILE"
if "$GIVE_HINTS"; then
safe_echo "\"$7\"," >> "$CSVFILE"
@ -2855,16 +2882,18 @@ run_server_banner() {
grep -ai '^Server' $HEADERFILE >$TMPFILE
if [[ $? -eq 0 ]]; then
serverbanner=$(sed -e 's/^Server: //' -e 's/^server: //' $TMPFILE)
if [[ "$serverbanner" == $'\n' ]] || [[ "$serverbanner" == $'\r' ]] || [[ "$serverbanner" == $'\n\r' ]] || [[ -z "$serverbanner" ]]; then
if [[ -z "$serverbanner" ]]; then
outln "exists but empty string"
fileout "$jsonID" "INFO" "Server banner is empty"
emphasize_stuff_in_headers "$serverbanner"
fileout "$jsonID" "INFO" "$serverbanner"
if [[ "$serverbanner" == *Microsoft-IIS/6.* ]] && [[ $OSSL_VER == 1.0.2* ]]; then
prln_warning " It's recommended to run another test w/ OpenSSL 1.0.1 !"
prln_warning " It's recommended to run another test w/ OpenSSL >= 1.0.1 !"
# see
fileout "${jsonID}" "WARN" "IIS6_openssl_mismatch: Recommended to rerun this test w/ OpenSSL 1.0.1. See"
fileout "${jsonID}" "WARN" "IIS6_openssl_mismatch: Recommended to rerun this test w/ OpenSSL >= 1.0.1. See"