factordb_old.pl 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. #!/usr/bin/perl
  2. # Daniel "Trizen" Șuteu
  3. # Date: 28 August 2019
  4. # https://github.com/trizen
  5. # Automatically factorize composite numbers without known factors from factordb.com and report the factorizations.
  6. # For small numbers, YAFU program is used, which fully factorizes the numbers.
  7. # For large numbers, the `ecm_factor` function from Math::Prime::Util::GMP is used, which finds only one factor using the ECM method.
  8. # The script also has support for `ecmpi`:
  9. # https://gite.lirmm.fr/bouvier/ecmpi
  10. # Usage:
  11. # perl factordb.pl # factorize the smallest numbers without known factors
  12. # perl factordb.pl -d n # factorize only numbers with at least n digits
  13. # perl factordb.pl -d n -s k # factorize only numbers with at least n digits, skipping the first k numbers
  14. use 5.020;
  15. use warnings;
  16. use File::Temp qw(tempdir);
  17. use File::Spec::Functions qw(catfile);
  18. use Math::GMPz;
  19. use ntheory qw(vecmin);
  20. use experimental qw(signatures);
  21. use Math::Prime::Util::GMP qw(
  22. lucas_sequence gcd pminus1_factor is_prime
  23. pplus1_factor holf_factor logint vecprod
  24. );
  25. use WWW::Mechanize;
  26. use Getopt::Std qw(getopts);
  27. use List::Util qw(shuffle uniq);
  28. use constant {
  29. USE_TOR_PROXY => 1, # true to use the Tor proxy to connect to factorDB (127.0.0.1:9050)
  30. PREFER_YAFU_ECM => 0, # true to prefer YAFU's ECM implementation
  31. PREFER_ECMPI => 1, # true to prefer ecmpi, which uses GMP-ECM (this is faster)
  32. ECM_MIN => 65, # numbers > 10^ECM_MIN will be factorized with ECM
  33. SIQS_MAX => 70, # numbers < 10^SIQS_MAX will be factorized with SIQS, if ECM fails
  34. ECM_TIMEOUT => 10, # number of seconds allocated for ECM
  35. YAFU_TIMEOUT => 60, # number of seconds allocated for YAFU on small numbers < 10^ECM_MIN
  36. };
  37. local $SIG{INT} = sub {
  38. say ":: Exiting...";
  39. exit 1;
  40. };
  41. chdir(my $cwd = tempdir(CLEANUP => 1));
  42. getopts('d:s:', \my %opt);
  43. sub fermat_find_factor ($n, $max_iter) {
  44. my $p = Math::GMPz::Rmpz_init(); # p = floor(sqrt(n))
  45. my $q = Math::GMPz::Rmpz_init(); # q = p^2 - n
  46. Math::GMPz::Rmpz_sqrtrem($p, $q, $n);
  47. Math::GMPz::Rmpz_neg($q, $q);
  48. for (my $j = 1 ; $j <= $max_iter ; ++$j) {
  49. Math::GMPz::Rmpz_addmul_ui($q, $p, 2);
  50. Math::GMPz::Rmpz_add_ui($q, $q, 1);
  51. Math::GMPz::Rmpz_add_ui($p, $p, 1);
  52. if (Math::GMPz::Rmpz_perfect_square_p($q)) {
  53. Math::GMPz::Rmpz_sqrt($q, $q);
  54. my $r = Math::GMPz::Rmpz_init();
  55. Math::GMPz::Rmpz_sub($r, $p, $q);
  56. return $r;
  57. }
  58. }
  59. return;
  60. }
  61. sub fast_fibonacci_check ($n, $upto) {
  62. foreach my $k (2 .. $upto) {
  63. foreach my $P (3, 4) {
  64. my ($U, $V) = map { Math::GMPz::Rmpz_init_set_str($_, 10) } lucas_sequence($n, $P, 1, $k);
  65. foreach my $f (sub { gcd($U, $n) }, sub { gcd($U - 1, $n) }, sub { gcd($V, $n) }, sub { gcd($V - 1, $n) }, sub { gcd($V - 2, $n) },) {
  66. my $g = Math::GMPz->new($f->());
  67. return $g if ($g > 1 and $g < $n);
  68. }
  69. }
  70. }
  71. return;
  72. }
  73. sub fast_power_check ($n, $upto) {
  74. state $t = Math::GMPz::Rmpz_init_nobless();
  75. state $g = Math::GMPz::Rmpz_init_nobless();
  76. my $base_limit = vecmin(logint($n, 2), 150);
  77. foreach my $base (2 .. $base_limit) {
  78. Math::GMPz::Rmpz_set_ui($t, $base);
  79. foreach my $exp (2 .. $upto) {
  80. Math::GMPz::Rmpz_mul_ui($t, $t, $base);
  81. Math::GMPz::Rmpz_sub_ui($g, $t, 1);
  82. Math::GMPz::Rmpz_gcd($g, $g, $n);
  83. if (Math::GMPz::Rmpz_cmp_ui($g, 1) > 0 and Math::GMPz::Rmpz_cmp($g, $n) < 0) {
  84. return Math::GMPz::Rmpz_init_set($g);
  85. }
  86. Math::GMPz::Rmpz_add_ui($g, $t, 1);
  87. Math::GMPz::Rmpz_gcd($g, $g, $n);
  88. if (Math::GMPz::Rmpz_cmp_ui($g, 1) > 0 and Math::GMPz::Rmpz_cmp($g, $n) < 0) {
  89. return Math::GMPz::Rmpz_init_set($g);
  90. }
  91. }
  92. }
  93. return undef;
  94. }
  95. sub special_form_factor ($n) {
  96. my $len = length("$n");
  97. if (ref($n) ne 'Math::GMPz') {
  98. $n = Math::GMPz->new("$n");
  99. }
  100. my @factors;
  101. if (defined(my $p = fermat_find_factor($n, 1e4))) {
  102. say ":: Fermat difference of squares...";
  103. return $p;
  104. }
  105. if (defined(my $p = fast_power_check($n, 500))) {
  106. say ":: Power fast-check found factor...";
  107. return $p;
  108. }
  109. if (defined(my $p = fast_fibonacci_check($n, 5000))) {
  110. say ":: Fibonacci fast-check found factor...";
  111. return $p;
  112. }
  113. my $pm1_limit = 1e5;
  114. my $pp1_limit = 1e5;
  115. my $holf_limit = 1e5;
  116. @factors = holf_factor($n, $holf_limit);
  117. if (@factors > 1) {
  118. say ":: HOLF found factor...";
  119. return @factors;
  120. }
  121. @factors = pplus1_factor($n, $pp1_limit);
  122. if (@factors > 1) {
  123. say ":: p+1 found factor...";
  124. return @factors;
  125. }
  126. @factors = pminus1_factor($n, $pm1_limit);
  127. if (@factors > 1) {
  128. say ":: p-1 found factor...";
  129. return @factors;
  130. }
  131. return;
  132. }
  133. sub validate_factors ($n, @factors) {
  134. if (ref($n) ne 'Math::GMPz') {
  135. $n = Math::GMPz->new("$n");
  136. }
  137. @factors || return;
  138. @factors = map { (ref($_) eq 'Math::GMPz') ? $_ : Math::GMPz::Rmpz_init_set_str("$_", 10) } @factors;
  139. @factors = grep { ($_ > 1) and ($_ < $n) } @factors;
  140. @factors || return;
  141. @factors = grep { $n % $_ == 0 } @factors;
  142. if (@factors) {
  143. push @factors, ($n / Math::GMPz->new(vecprod(@factors)));
  144. }
  145. @factors = grep { ($_ > 1) and ($_ < $n) } @factors;
  146. @factors = sort { $a <=> $b } @factors;
  147. return uniq(@factors);
  148. }
  149. sub parse_yafu_output ($n, $output) {
  150. my @factors;
  151. while ($output =~ /^[CP]\d+\s*=\s*(\d+)/mg) {
  152. push @factors, Math::GMPz->new($1);
  153. }
  154. return validate_factors($n, @factors);
  155. }
  156. sub parse_ecmpi_output ($n, $output) {
  157. my @factors;
  158. while ($output =~ /found factor (\d+)/g) {
  159. push @factors, Math::GMPz->new($1);
  160. }
  161. return validate_factors($n, @factors);
  162. }
  163. sub parse_mpu_output ($n, $output) {
  164. my @factors;
  165. while ($output =~ /^(\d+)/mg) {
  166. push @factors, Math::GMPz->new($1);
  167. }
  168. return validate_factors($n, @factors);
  169. }
  170. sub trial_division ($n, $k = 1e5) { # trial division, using cached primorials + GCD
  171. state %cache;
  172. # Clear the cache when there are too many values cached
  173. if (scalar(keys(%cache)) > 100) {
  174. Math::GMPz::Rmpz_clear($_) for values(%cache);
  175. undef %cache;
  176. }
  177. my $B = (
  178. $cache{$k} //= do {
  179. my $t = Math::GMPz::Rmpz_init_nobless();
  180. Math::GMPz::Rmpz_primorial_ui($t, $k);
  181. $t;
  182. }
  183. );
  184. state $g = Math::GMPz::Rmpz_init_nobless();
  185. state $t = Math::GMPz::Rmpz_init_nobless();
  186. Math::GMPz::Rmpz_gcd($g, $n, $B);
  187. if (Math::GMPz::Rmpz_cmp_ui($g, 1) > 0) {
  188. Math::GMPz::Rmpz_set($t, $n);
  189. my %factor;
  190. foreach my $f (Math::Prime::Util::GMP::factor(Math::GMPz::Rmpz_get_str($g, 10))) {
  191. Math::GMPz::Rmpz_set_ui($g, $f);
  192. $factor{$f} = Math::GMPz::Rmpz_remove($t, $t, $g);
  193. }
  194. my @factors = keys %factor;
  195. if (Math::GMPz::Rmpz_cmp_ui($t, 1) > 0) {
  196. push @factors, Math::GMPz::Rmpz_init_set($t);
  197. }
  198. return validate_factors($n, @factors);
  199. }
  200. return;
  201. }
  202. sub ecm_one_factor ($n) {
  203. my @factors;
  204. eval {
  205. local $SIG{ALRM} = sub { die "alarm\n" };
  206. alarm ECM_TIMEOUT;
  207. if (PREFER_YAFU_ECM) {
  208. say ":: Trying YAFU's ECM...";
  209. my $curves = 2 * ECM_TIMEOUT;
  210. my $output = qx(yafu 'ecm($n, $curves)' -B1ecm 60000);
  211. push @factors, parse_yafu_output($n, $output);
  212. }
  213. if (PREFER_ECMPI) {
  214. say ":: Trying ECMPI with GMP-ECM...";
  215. my $curves = 2 * ECM_TIMEOUT;
  216. my $output = qx(ecmpi -N $n -B1 180000 -nb $curves);
  217. push @factors, parse_ecmpi_output($n, $output);
  218. }
  219. if (!@factors) {
  220. say ":: Trying MPU's ECM...";
  221. my $output = `$^X -MMath::Prime::Util::GMP=ecm_factor -E 'say for ecm_factor("$n")'`;
  222. ## my $output = `$^X -MMath::Prime::Util=factor -E 'say for factor("$n")'`;
  223. push @factors, parse_mpu_output($n, $output);
  224. }
  225. alarm 0;
  226. };
  227. if ($@ eq "alarm\n") {
  228. `pkill -P $$`;
  229. if (length("$n") < 1000) {
  230. say ":: Trying to find factors of special form...";
  231. push @factors, special_form_factor($n);
  232. }
  233. }
  234. if (@factors and $factors[0] <= 1e8 and !PREFER_ECMPI and !is_prime($factors[-1])) {
  235. pop(@factors) if (scalar(@factors) >= 2);
  236. say ":: Trial division up to 10^8...";
  237. push @factors, trial_division($n / Math::GMPz->new(vecprod(@factors)), 1e8);
  238. push @factors, $n / Math::GMPz->new(vecprod(@factors));
  239. }
  240. if (!@factors and length("$n") <= SIQS_MAX) {
  241. say ":: Trying YAFU SIQS...";
  242. my $output = qx(yafu 'siqs($n)');
  243. push @factors, parse_yafu_output($n, $output);
  244. }
  245. return validate_factors($n, @factors);
  246. }
  247. sub yafu_factor ($n) {
  248. $n = Math::GMPz->new($n); # validate the number
  249. if (length($n) >= ECM_MIN) {
  250. return ecm_one_factor($n);
  251. }
  252. my @factors;
  253. eval {
  254. local $SIG{ALRM} = sub { die "alarm\n" };
  255. alarm YAFU_TIMEOUT;
  256. my $output = qx(yafu 'factor($n)');
  257. @factors = parse_yafu_output($n, $output);
  258. alarm 0;
  259. };
  260. if ($@ eq "alarm\n") {
  261. `pkill -P $$`;
  262. say ":: YAFU timeout...";
  263. unlink(catfile($cwd, 'siqs.dat'));
  264. return ecm_one_factor($n);
  265. }
  266. if (!@factors) {
  267. say ":: YAFU failed...";
  268. unlink(catfile($cwd, 'siqs.dat'));
  269. return ecm_one_factor($n);
  270. }
  271. return @factors;
  272. }
  273. my $mech = WWW::Mechanize->new(
  274. autocheck => 1,
  275. show_progress => 0,
  276. stack_depth => 10,
  277. agent => "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0",
  278. );
  279. {
  280. state $accepted_encodings = HTTP::Message::decodable();
  281. $mech->default_header('Accept-Encoding' => $accepted_encodings);
  282. };
  283. {
  284. require LWP::ConnCache;
  285. my $cache = LWP::ConnCache->new;
  286. $cache->total_capacity(undef); # no limit
  287. $mech->conn_cache($cache);
  288. };
  289. if (USE_TOR_PROXY) {
  290. $mech->proxy(['http', 'https'], "socks://127.0.0.1:9050");
  291. }
  292. my $start = 0;
  293. my $digits = 0;
  294. my $perpage = 100;
  295. if (defined($opt{s}) and $opt{s} > 0) {
  296. $start = $opt{s};
  297. }
  298. if (defined($opt{d}) and $opt{d} > 0) {
  299. $digits = $opt{d};
  300. }
  301. my $failure = 0;
  302. my $success = 0;
  303. my $time = time;
  304. my $old_time = $time;
  305. my $interval = 60; # seconds
  306. my $factor_size = 0; # average factor size
  307. sub factordb {
  308. my $main_url = "http://factordb.com/listtype.php?t=3";
  309. $main_url .= "&start=$start&perpage=$perpage";
  310. say ":: Start value = $start (page ", 1 + int($start / $perpage), ")";
  311. if ($digits > 0) {
  312. $main_url .= "&mindig=$opt{d}";
  313. }
  314. my $resp = $mech->get($main_url);
  315. my @links = $mech->find_all_links(url_regex => qr/index\.php\?id=\d+/);
  316. if ($digits > 0) {
  317. @links = shuffle(@links);
  318. }
  319. foreach my $link (@links) {
  320. my $expr = $link->text;
  321. my $digits = 0;
  322. next if $expr =~ /##/; # ignore numbers that are formed from primorials
  323. ##next if $expr !~ /\^/; # ignore numbers that are not formed from powers
  324. ##next if $expr !~ /[LI]/; # ignore numbers that are not formed from Fibonacci or Lucas numbers
  325. my $resp = $mech->get($link);
  326. if ($resp->decoded_content =~ m{<font color="#002099">(.*?)</font></a><sub>&lt;(\d+)&gt;</sub>}) {
  327. $expr = $1;
  328. $digits = $2;
  329. }
  330. say "\n:: Factoring: $expr ($digits digits)";
  331. if ($resp->decoded_content =~ m{<td>FF</td>}) {
  332. say ":: Already factorized...";
  333. next;
  334. }
  335. my $reget = 0;
  336. my $number = $expr;
  337. if ($number !~ /^[0-9]+\z/) {
  338. $reget = 1;
  339. $resp = $mech->follow_link(text_regex => qr/\(show\)/);
  340. if ($resp->decoded_content =~ m{<td align="center">Number</td>\s*<td align="center">(.*?)</td>}s) {
  341. $number = $1;
  342. $number =~ s/<(.*?)>//g;
  343. $number = join('', split(' ', $number));
  344. }
  345. else {
  346. warn ":: Can't extract digits of <<$number>>\n";
  347. next;
  348. }
  349. }
  350. my @factors = yafu_factor($number);
  351. if ((time - $old_time) >= $interval) {
  352. my $total_time = (time - $time) || 1;
  353. my $predicted_success = int($success / $total_time * 3600);
  354. my $predicted_total = int(($success + $failure) / $total_time * 3600);
  355. if ($predicted_total > 0) {
  356. printf(":: Predicted success: %.2f%% (%d out of %d) per hour...\n",
  357. 100 * $predicted_success / $predicted_total,
  358. $predicted_success, $predicted_total);
  359. }
  360. $old_time = time;
  361. }
  362. if (!@factors) {
  363. say ":: Failed to factorize...";
  364. $failure += 1;
  365. next;
  366. }
  367. $success += 1;
  368. $factor_size += length("$factors[0]");
  369. say ":: Factors: @factors";
  370. printf(":: Average factor size found: %d digits...\n", int($factor_size / $success));
  371. printf(":: Factorization success: %.2f%% (%d out of %d)\n", 100 * $success / ($failure + $success), $success, $failure + $success);
  372. if ($reget) {
  373. for my $i (1 .. 5) {
  374. say ":: Retrying to connect ($i out of 5)..." if ($i > 1);
  375. $resp = eval { $mech->get($link) } // next;
  376. $resp->status_line =~ /Server closed connection/ or last;
  377. }
  378. }
  379. if ($resp->decoded_content =~ m{<td>FF</td>}) {
  380. say ":: Already factorized...";
  381. next;
  382. }
  383. say ":: Sending factors...";
  384. if (scalar(@factors) >= 2) {
  385. pop @factors;
  386. }
  387. $resp = $mech->submit_form(
  388. form_number => 1,
  389. fields => {
  390. 'report' => join(' ', @factors),
  391. 'format' => 7,
  392. }
  393. );
  394. if ($resp->decoded_content =~ /Thank you/i) {
  395. say ":: New prime factors submitted!";
  396. }
  397. }
  398. }
  399. while (1) {
  400. factordb();
  401. $start += $perpage;
  402. say "\n:: Getting the next page of results...";
  403. }