regexp_to_strings.pl 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. #!/usr/bin/perl
  2. # Author: Daniel "Trizen" Șuteu
  3. # License: GPLv3
  4. # Date: 21 December 2014
  5. # Website: https://github.com/trizen
  6. # Find the minimum sentence(s) that satisfies a regular expression
  7. # See also: https://www.perlmonks.org/?node_id=284513
  8. # WARNING: this script is just an idea in development
  9. # usage: perl regex_to_strings.pl [regexp]
  10. use utf8;
  11. use 5.010;
  12. use strict;
  13. use warnings;
  14. use Regexp::Parser;
  15. use Data::Dump qw(pp);
  16. binmode(STDOUT, ':utf8');
  17. {
  18. no warnings 'redefine';
  19. *Regexp::Parser::anyof_class::new = sub {
  20. my ($class, $rx, $type, $neg, $how) = @_;
  21. my $self = bless {
  22. rx => $rx,
  23. flags => $rx->{flags}[-1],
  24. family => 'anyof_class',
  25. }, $class;
  26. if (ref $type) {
  27. $self->{data} = $type;
  28. }
  29. else {
  30. $self->{type} = $type;
  31. $self->{data} = 'POSIX';
  32. $self->{neg} = $neg;
  33. $self->{how} = ${$how}; # bug-fix
  34. }
  35. return $self;
  36. };
  37. }
  38. my $regex = shift() // 'ab(c[12]|d(n|p)o)\w{3}[.?!]{4}';
  39. my $parser = Regexp::Parser->new($regex);
  40. my %conv = (
  41. alnum => 'a',
  42. nalnum => '#',
  43. digit => '1',
  44. ndigit => '+',
  45. space => ' ',
  46. nspace => '.',
  47. );
  48. my @stack;
  49. my @strings = [];
  50. my $ref = \@strings;
  51. my $iter = $parser->walker;
  52. my $min = 1;
  53. my $last_depth = 0;
  54. while (my ($node, $depth) = $iter->()) {
  55. my $family = $node->family;
  56. my $type = $node->type;
  57. if ($depth < $last_depth) {
  58. $min = 1;
  59. say "MIN CHANGED";
  60. }
  61. if ($family eq 'quant') {
  62. $min = $node->min;
  63. say "MIN == $min";
  64. }
  65. pp $family, $type, $node->qr; # for debug
  66. if ($type =~ /^(?:close\d|tail)/) {
  67. $ref = pop @stack;
  68. }
  69. elsif (exists $conv{$type}) {
  70. push @{$ref->[-1]}, $conv{$type} x $min;
  71. }
  72. elsif ($family eq 'open' or $type eq 'group' or $type eq 'suspend') {
  73. push @stack, $ref;
  74. push @{$ref->[-1]}, [];
  75. $ref = $ref->[-1][-1];
  76. push @{$ref}, [];
  77. }
  78. elsif ($type eq 'branch') {
  79. $#{$ref->[-1]} == -1 or push @{$ref}, [];
  80. }
  81. elsif ($type eq 'exact' or $type eq 'exactf') {
  82. push @{$ref->[-1]}, $node->data x $min;
  83. }
  84. elsif ($type eq 'anyof' and $min > 0) {
  85. my $regex = $node->qr;
  86. foreach my $c (0 .. 1000000) {
  87. if (chr($c) =~ /$regex/) {
  88. push @{$ref->[-1]}, chr($c) x $min;
  89. last;
  90. }
  91. }
  92. }
  93. $last_depth = $depth;
  94. }
  95. use Data::Dump qw(pp);
  96. pp @strings;
  97. # TODO: join the @strings into real $strings