extract-localizable-strings 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. #!/usr/bin/perl -w
  2. # Copyright (C) 2006, 2007, 2009, 2010 Apple Inc. All rights reserved.
  3. #
  4. # Redistribution and use in source and binary forms, with or without
  5. # modification, are permitted provided that the following conditions
  6. # are met:
  7. #
  8. # 1. Redistributions of source code must retain the above copyright
  9. # notice, this list of conditions and the following disclaimer.
  10. # 2. Redistributions in binary form must reproduce the above copyright
  11. # notice, this list of conditions and the following disclaimer in the
  12. # documentation and/or other materials provided with the distribution.
  13. # 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
  14. # its contributors may be used to endorse or promote products derived
  15. # from this software without specific prior written permission.
  16. #
  17. # THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
  18. # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  19. # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  20. # DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
  21. # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  22. # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  23. # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  24. # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  26. # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27. # This script is like the genstrings tool (minus most of the options) with these differences.
  28. #
  29. # 1) It uses the names UI_STRING and UI_STRING_WITH_KEY for the macros, rather than the macros
  30. # from NSBundle.h, and doesn't support tables (although they would be easy to add).
  31. # 2) It supports UTF-8 in key strings (and hence uses "" strings rather than @"" strings;
  32. # @"" strings only reliably support ASCII since they are decoded based on the system encoding
  33. # at runtime, so give different results on US and Japanese systems for example).
  34. # 3) It looks for strings that are not marked for localization, using both macro names that are
  35. # known to be used for debugging in Intrigue source code and an exceptions file.
  36. # 4) It finds the files to work on rather than taking them as parameters, and also uses a
  37. # hardcoded location for both the output file and the exceptions file.
  38. # It would have been nice to use the project to find the source files, but it's too hard to
  39. # locate source files after parsing a .pbxproj file.
  40. # The exceptions file has a list of strings in quotes, filenames, and filename/string pairs separated by :.
  41. use strict;
  42. sub UnescapeHexSequence($);
  43. my %isDebugMacro = ( ASSERT_WITH_MESSAGE => 1, LOG_ERROR => 1, ERROR => 1, NSURL_ERROR => 1, FATAL => 1, LOG => 1, LOG_WARNING => 1, UI_STRING_LOCALIZE_LATER => 1, UI_STRING_LOCALIZE_LATER_KEY => 1, LPCTSTR_UI_STRING_LOCALIZE_LATER => 1, UNLOCALIZED_STRING => 1, UNLOCALIZED_LPCTSTR => 1, dprintf => 1, NSException => 1, NSLog => 1, printf => 1 );
  44. @ARGV >= 2 or die "Usage: extract-localizable-strings <exceptions file> <file to update> [ directory... ]\nDid you mean to run update-webkit-localizable-strings instead?\n";
  45. my $exceptionsFile = shift @ARGV;
  46. -f $exceptionsFile or die "Couldn't find exceptions file $exceptionsFile\n" unless $exceptionsFile eq "-";
  47. my $fileToUpdate = shift @ARGV;
  48. -f $fileToUpdate or die "Couldn't find file to update $fileToUpdate\n";
  49. my $warnAboutUnlocalizedStrings = $exceptionsFile ne "-";
  50. my @directories = ();
  51. my @directoriesToSkip = ();
  52. if (@ARGV < 1) {
  53. push(@directories, ".");
  54. } else {
  55. for my $dir (@ARGV) {
  56. if ($dir =~ /^-(.*)$/) {
  57. push @directoriesToSkip, $1;
  58. } else {
  59. push @directories, $dir;
  60. }
  61. }
  62. }
  63. my $sawError = 0;
  64. my $localizedCount = 0;
  65. my $keyCollisionCount = 0;
  66. my $notLocalizedCount = 0;
  67. my $NSLocalizeCount = 0;
  68. my %exception;
  69. my %usedException;
  70. if ($exceptionsFile ne "-" && open EXCEPTIONS, $exceptionsFile) {
  71. while (<EXCEPTIONS>) {
  72. chomp;
  73. if (/^"([^\\"]|\\.)*"$/ or /^[-_\/\w\s.]+.(h|m|mm|c|cpp)$/ or /^[-_\/\w\s.]+.(h|m|mm|c|cpp):"([^\\"]|\\.)*"$/) {
  74. if ($exception{$_}) {
  75. print "$exceptionsFile:$.:exception for $_ appears twice\n";
  76. print "$exceptionsFile:$exception{$_}:first appearance\n";
  77. } else {
  78. $exception{$_} = $.;
  79. }
  80. } else {
  81. print "$exceptionsFile:$.:syntax error\n";
  82. }
  83. }
  84. close EXCEPTIONS;
  85. }
  86. my $quotedDirectoriesString = '"' . join('" "', @directories) . '"';
  87. for my $dir (@directoriesToSkip) {
  88. $quotedDirectoriesString .= ' -path "' . $dir . '" -prune -o';
  89. }
  90. my @files = ( split "\n", `find $quotedDirectoriesString \\( -name "*.h" -o -name "*.m" -o -name "*.mm" -o -name "*.c" -o -name "*.cpp" \\)` );
  91. for my $file (sort @files) {
  92. next if $file =~ /\/\w+LocalizableStrings\w*\.h$/ || $file =~ /\/LocalizedStrings\.h$/;
  93. $file =~ s-^./--;
  94. open SOURCE, $file or die "can't open $file\n";
  95. my $inComment = 0;
  96. my $expected = "";
  97. my $macroLine;
  98. my $macro;
  99. my $UIString;
  100. my $key;
  101. my $comment;
  102. my $string;
  103. my $stringLine;
  104. my $nestingLevel;
  105. my $previousToken = "";
  106. while (<SOURCE>) {
  107. chomp;
  108. # Handle continued multi-line comment.
  109. if ($inComment) {
  110. next unless s-.*\*/--;
  111. $inComment = 0;
  112. }
  113. # Handle all the tokens in the line.
  114. while (s-^\s*([#\w]+|/\*|//|[^#\w/'"()\[\],]+|.)--) {
  115. my $token = $1;
  116. if ($token eq "\"") {
  117. if ($expected and $expected ne "a quoted string") {
  118. print "$file:$.:ERROR:found a quoted string but expected $expected\n";
  119. $sawError = 1;
  120. $expected = "";
  121. }
  122. if (s-^(([^\\$token]|\\.)*?)$token--) {
  123. if (!defined $string) {
  124. $stringLine = $.;
  125. $string = $1;
  126. } else {
  127. $string .= $1;
  128. }
  129. } else {
  130. print "$file:$.:ERROR:mismatched quotes\n";
  131. $sawError = 1;
  132. $_ = "";
  133. }
  134. next;
  135. }
  136. if (defined $string) {
  137. handleString:
  138. if ($expected) {
  139. if (!defined $UIString) {
  140. # FIXME: Validate UTF-8 here?
  141. $UIString = $string;
  142. $expected = ",";
  143. } elsif (($macro =~ /(WEB_)?UI_STRING_KEY(_INTERNAL)?$/) and !defined $key) {
  144. # FIXME: Validate UTF-8 here?
  145. $key = $string;
  146. $expected = ",";
  147. } elsif (!defined $comment) {
  148. # FIXME: Validate UTF-8 here?
  149. $comment = $string;
  150. $expected = ")";
  151. }
  152. } else {
  153. if (defined $nestingLevel) {
  154. # In a debug macro, no need to localize.
  155. } elsif ($previousToken eq "#include" or $previousToken eq "#import") {
  156. # File name, no need to localize.
  157. } elsif ($previousToken eq "extern" and $string eq "C") {
  158. # extern "C", no need to localize.
  159. } elsif ($string eq "") {
  160. # Empty string can sometimes be localized, but we need not complain if not.
  161. } elsif ($exception{$file}) {
  162. $usedException{$file} = 1;
  163. } elsif ($exception{"\"$string\""}) {
  164. $usedException{"\"$string\""} = 1;
  165. } elsif ($exception{"$file:\"$string\""}) {
  166. $usedException{"$file:\"$string\""} = 1;
  167. } else {
  168. print "$file:$stringLine:\"$string\" is not marked for localization\n" if $warnAboutUnlocalizedStrings;
  169. $notLocalizedCount++;
  170. }
  171. }
  172. $string = undef;
  173. last if !defined $token;
  174. }
  175. $previousToken = $token;
  176. if ($token =~ /^NSLocalized/ && $token !~ /NSLocalizedDescriptionKey/ && $token !~ /NSLocalizedStringFromTableInBundle/ && $token !~ /NSLocalizedFileSizeDescription/) {
  177. print "$file:$.:ERROR:found a use of an NSLocalized macro ($token); not supported\n";
  178. $nestingLevel = 0 if !defined $nestingLevel;
  179. $sawError = 1;
  180. $NSLocalizeCount++;
  181. } elsif ($token eq "/*") {
  182. if (!s-^.*?\*/--) {
  183. $_ = ""; # If the comment doesn't end, discard the result of the line and set flag
  184. $inComment = 1;
  185. }
  186. } elsif ($token eq "//") {
  187. $_ = ""; # Discard the rest of the line
  188. } elsif ($token eq "'") {
  189. if (!s-([^\\]|\\.)'--) { #' <-- that single quote makes the Project Builder editor less confused
  190. print "$file:$.:ERROR:mismatched single quote\n";
  191. $sawError = 1;
  192. $_ = "";
  193. }
  194. } else {
  195. if ($expected and $expected ne $token) {
  196. print "$file:$.:ERROR:found $token but expected $expected\n";
  197. $sawError = 1;
  198. $expected = "";
  199. }
  200. if ($token =~ /(WEB_)?UI_STRING(_KEY)?(_INTERNAL)?$/) {
  201. $expected = "(";
  202. $macro = $token;
  203. $UIString = undef;
  204. $key = undef;
  205. $comment = undef;
  206. $macroLine = $.;
  207. } elsif ($token eq "(" or $token eq "[") {
  208. ++$nestingLevel if defined $nestingLevel;
  209. $expected = "a quoted string" if $expected;
  210. } elsif ($token eq ",") {
  211. $expected = "a quoted string" if $expected;
  212. } elsif ($token eq ")" or $token eq "]") {
  213. $nestingLevel = undef if defined $nestingLevel && !--$nestingLevel;
  214. if ($expected) {
  215. $key = $UIString if !defined $key;
  216. HandleUIString($UIString, $key, $comment, $file, $macroLine);
  217. $macro = "";
  218. $expected = "";
  219. $localizedCount++;
  220. }
  221. } elsif ($isDebugMacro{$token}) {
  222. $nestingLevel = 0 if !defined $nestingLevel;
  223. }
  224. }
  225. }
  226. }
  227. goto handleString if defined $string;
  228. if ($expected) {
  229. print "$file:ERROR:reached end of file but expected $expected\n";
  230. $sawError = 1;
  231. }
  232. close SOURCE;
  233. }
  234. # Unescapes C language hexadecimal escape sequences.
  235. sub UnescapeHexSequence($)
  236. {
  237. my ($originalStr) = @_;
  238. my $escapedStr = $originalStr;
  239. my $unescapedStr = "";
  240. for (;;) {
  241. if ($escapedStr =~ s-^\\x([[:xdigit:]]+)--) {
  242. if (256 <= hex($1)) {
  243. print "Hexadecimal escape sequence out of range: \\x$1\n";
  244. return undef;
  245. }
  246. $unescapedStr .= pack("H*", $1);
  247. } elsif ($escapedStr =~ s-^(.)--) {
  248. $unescapedStr .= $1;
  249. } else {
  250. return $unescapedStr;
  251. }
  252. }
  253. }
  254. my %stringByKey;
  255. my %commentByKey;
  256. my %fileByKey;
  257. my %lineByKey;
  258. sub HandleUIString
  259. {
  260. my ($string, $key, $comment, $file, $line) = @_;
  261. my $bad = 0;
  262. $string = UnescapeHexSequence($string);
  263. if (!defined($string)) {
  264. print "$file:$line:ERROR:string has an illegal hexadecimal escape sequence\n";
  265. $bad = 1;
  266. }
  267. $key = UnescapeHexSequence($key);
  268. if (!defined($key)) {
  269. print "$file:$line:ERROR:key has an illegal hexadecimal escape sequence\n";
  270. $bad = 1;
  271. }
  272. $comment = UnescapeHexSequence($comment);
  273. if (!defined($comment)) {
  274. print "$file:$line:ERROR:comment has an illegal hexadecimal escape sequence\n";
  275. $bad = 1;
  276. }
  277. if (grep { $_ == 0xFFFD } unpack "U*", $string) {
  278. print "$file:$line:ERROR:string for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
  279. $bad = 1;
  280. }
  281. if ($string ne $key && grep { $_ == 0xFFFD } unpack "U*", $key) {
  282. print "$file:$line:ERROR:key has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
  283. $bad = 1;
  284. }
  285. if (grep { $_ == 0xFFFD } unpack "U*", $comment) {
  286. print "$file:$line:ERROR:comment for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
  287. $bad = 1;
  288. }
  289. if ($bad) {
  290. $sawError = 1;
  291. return;
  292. }
  293. if ($stringByKey{$key} && $stringByKey{$key} ne $string) {
  294. print "$file:$line:encountered the same key, \"$key\", twice, with different strings\n";
  295. print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n";
  296. $keyCollisionCount++;
  297. return;
  298. }
  299. if ($commentByKey{$key} && $commentByKey{$key} ne $comment) {
  300. print "$file:$line:encountered the same key, \"$key\", twice, with different comments\n";
  301. print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n";
  302. $keyCollisionCount++;
  303. return;
  304. }
  305. $fileByKey{$key} = $file;
  306. $lineByKey{$key} = $line;
  307. $stringByKey{$key} = $string;
  308. $commentByKey{$key} = $comment;
  309. }
  310. print "\n" if $sawError || $notLocalizedCount || $NSLocalizeCount;
  311. my @unusedExceptions = sort grep { !$usedException{$_} } keys %exception;
  312. if (@unusedExceptions) {
  313. for my $unused (@unusedExceptions) {
  314. print "$exceptionsFile:$exception{$unused}:exception $unused not used\n";
  315. }
  316. print "\n";
  317. }
  318. print "$localizedCount localizable strings\n" if $localizedCount;
  319. print "$keyCollisionCount key collisions\n" if $keyCollisionCount;
  320. print "$notLocalizedCount strings not marked for localization\n" if $notLocalizedCount;
  321. print "$NSLocalizeCount uses of NSLocalize\n" if $NSLocalizeCount;
  322. print scalar(@unusedExceptions), " unused exceptions\n" if @unusedExceptions;
  323. if ($sawError) {
  324. print "\nErrors encountered. Exiting without writing to $fileToUpdate.\n";
  325. exit 1;
  326. }
  327. my $localizedStrings = "";
  328. for my $key (sort keys %commentByKey) {
  329. $localizedStrings .= "/* $commentByKey{$key} */\n\"$key\" = \"$stringByKey{$key}\";\n\n";
  330. }
  331. # Write out the strings file in UTF-16 with a BOM.
  332. utf8::decode($localizedStrings) if $^V ge v5.8;
  333. my $output = pack "n*", (0xFEFF, unpack "U*", $localizedStrings);
  334. if (-e "$fileToUpdate") {
  335. open STRINGS, ">", "$fileToUpdate" or die;
  336. print STRINGS $output;
  337. close STRINGS;
  338. } else {
  339. print "$fileToUpdate does not exist\n";
  340. exit 1;
  341. }