| #!/usr/bin/env perl |
| #*************************************************************************** |
| # _ _ ____ _ |
| # Project ___| | | | _ \| | |
| # / __| | | | |_) | | |
| # | (__| |_| | _ <| |___ |
| # \___|\___/|_| \_\_____| |
| # |
| # Copyright (C) Viktor Szakats |
| # |
| # This software is licensed as described in the file COPYING, which |
| # you should have received as part of this distribution. The terms |
| # are also available at https://curl.se/docs/copyright.html. |
| # |
| # You may opt to use, copy, modify, merge, publish, distribute and/or sell |
| # copies of the Software, and permit persons to whom the Software is |
| # furnished to do so, under the terms of the COPYING file. |
| # |
| # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY |
| # KIND, either express or implied. |
| # |
| # SPDX-License-Identifier: curl |
| # |
| ########################################################################### |
| |
| use strict; |
| use warnings; |
| |
| my @tabs = ( |
| "^m4/zz40-xc-ovr.m4", |
| "Makefile\\.(am|example)\$", |
| "\\.sln\$", |
| "^tests/data/data1706-stdout.txt", |
| "^tests/data/test", |
| ); |
| |
| my @need_crlf = ( |
| "\\.(bat|sln)\$", |
| ); |
| |
| my @double_empty_lines = ( |
| "RELEASE-NOTES", |
| "^lib/.+\\.(c|h)\$", |
| "^projects/OS400", |
| "^projects/vms", |
| "^tests/data/test", |
| "\\.(m4|py)\$", |
| ); |
| |
| my @non_ascii_allowed = ( |
| '\xC3\xB6', # UTF-8 for https://codepoints.net/U+00F6 LATIN SMALL LETTER O WITH DIAERESIS |
| ); |
| |
| my $non_ascii_allowed = join(', ', @non_ascii_allowed); |
| |
| my @non_ascii = ( |
| ".github/scripts/pyspelling.words", |
| ".mailmap", |
| "RELEASE-NOTES", |
| "docs/BINDINGS.md", |
| "docs/THANKS", |
| "docs/THANKS-filter", |
| ); |
| |
| sub fn_match { |
| my ($filename, @masklist) = @_; |
| |
| foreach my $mask (@masklist) { |
| if($filename =~ $mask) { |
| return 1; |
| } |
| } |
| return 0; |
| } |
| |
| sub eol_detect { |
| my ($content) = @_; |
| |
| my $cr = () = $content =~ /\r/g; |
| my $lf = () = $content =~ /\n/g; |
| |
| if($cr > 0 && $lf == 0) { |
| return "cr"; |
| } |
| elsif($cr == 0 && $lf > 0) { |
| return "lf"; |
| } |
| elsif($cr == 0 && $lf == 0) { |
| return "bin"; |
| } |
| elsif($cr == $lf) { |
| return "crlf"; |
| } |
| |
| return ""; |
| } |
| |
| my $issues = 0; |
| |
| open(my $git_ls_files, '-|', 'git ls-files') or die "Failed running git ls-files: $!"; |
| while(my $filename = <$git_ls_files>) { |
| chomp $filename; |
| |
| open(my $fh, '<', $filename) or die "Cannot open '$filename': $!"; |
| my $content = do { local $/; <$fh> }; |
| close $fh; |
| |
| my @err = (); |
| |
| if(!fn_match($filename, @tabs) && |
| $content =~ /\t/) { |
| push @err, "content: has tab"; |
| } |
| |
| my $eol = eol_detect($content); |
| |
| if($eol eq "") { |
| push @err, "content: has mixed EOL types"; |
| } |
| |
| if($eol ne "crlf" && |
| fn_match($filename, @need_crlf)) { |
| push @err, "content: must use CRLF EOL for this file type"; |
| } |
| |
| if($eol ne "lf" && $content ne "" && |
| !fn_match($filename, @need_crlf)) { |
| push @err, "content: must use LF EOL for this file type"; |
| } |
| |
| if($content =~ /[ \t]\n/) { |
| my $line; |
| for my $l (split(/\n/, $content)) { |
| $line++; |
| if($l =~ /[ \t]$/) { |
| push @err, "line $line: trailing whitespace"; |
| } |
| } |
| } |
| |
| if($content ne "" && |
| $content !~ /\n\z/) { |
| push @err, "content: has no EOL at EOF"; |
| } |
| |
| if($content =~ /\n\n\z/ || |
| $content =~ /\r\n\r\n\z/) { |
| push @err, "content: has multiple EOL at EOF"; |
| } |
| |
| if(!fn_match($filename, @double_empty_lines)) { |
| if($content =~ /\n\n\n/ || |
| $content =~ /\r\n\r\n\r\n/) { |
| my $line = 0; |
| my $blank = 0; |
| for my $l (split(/\n/, $content)) { |
| chomp $l; |
| $line++; |
| if($l =~ /^$/) { |
| if($blank) { |
| my $lineno = sprintf("duplicate empty line @ line %d", $line); |
| push @err, $lineno; |
| } |
| $blank = 1; |
| } |
| else { |
| $blank = 0; |
| } |
| } |
| } |
| } |
| |
| if($content =~ /([\x00-\x08\x0b\x0c\x0e-\x1f\x7f])/) { |
| push @err, "content: has binary contents"; |
| } |
| |
| if($filename !~ /tests\/data/) { |
| # the tests have no allowed UTF bytes |
| $content =~ s/[$non_ascii_allowed]//g; |
| } |
| |
| if(!fn_match($filename, @non_ascii) && |
| ($content =~ /([\x80-\xff]+)/)) { |
| my $non = $1; |
| my $hex; |
| for my $e (split(//, $non)) { |
| $hex .= sprintf("%s%02x", $hex ? " ": "", ord($e)); |
| } |
| my $line; |
| for my $l (split(/\n/, $content)) { |
| $line++; |
| if($l =~ /([\x80-\xff]+)/) { |
| push @err, "line $line: has non-ASCII: '$non' ($hex)"; |
| } |
| } |
| } |
| |
| if(@err) { |
| $issues++; |
| foreach my $err (@err) { |
| print "$filename: $err\n"; |
| } |
| } |
| } |
| close $git_ls_files; |
| |
| if($issues) { |
| exit 1; |
| } |