diff.cpp 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. #include <cstdio>
  2. #include <algorithm>
  3. #include <vector>
  4. #include <string>
  5. #include <iterator>
  6. #include <deque>
  7. #include "simple/file.hpp" // TODO: passing . . as filenames gets bad_alloc thrown, replace this with simple.io and see what happens instead.
  8. #include "simple/geom/vector.hpp"
  9. #include "simple/geom/bool_algebra.hpp"
  10. #include "simple/geom/segment.hpp"
  11. #include "simple/support/arithmetic.hpp"
  12. #include "simple/support/enum.hpp"
  13. #include "simple/support/misc.hpp"
  14. #include "simple/support/algorithm/split.hpp"
  15. #include "simple/support/iterator/match.hpp"
  16. using namespace simple::file;
  17. using index_type = simple::geom::vector<size_t, 2>;
  18. using double_buffer = std::pair<std::vector<char>, std::vector<char>>;
  19. enum class Options
  20. {
  21. Wordwise,
  22. Linewise,
  23. Distance,
  24. Invalid
  25. };
  26. using Option = simple::support::mapped_enum<Options, Options::Invalid, 2>;
  27. template <> Option::guts::map_type Option::guts::map
  28. {{
  29. { "-w"s, "--words"s },
  30. { "-l"s, "--lines"s },
  31. { "-d"s, "--distance"s },
  32. }};
  33. template <typename Buffers>
  34. index_type get_size(const Buffers& buffers)
  35. {
  36. return index_type(buffers.first.size(), buffers.second.size());
  37. }
  38. template <typename Buffers>
  39. bool diff_at(const Buffers& buffers, index_type position)
  40. {
  41. return buffers.first[position.x()] != buffers.second[position.y()];
  42. }
  43. template <typename Buffers>
  44. index_type find_change(const Buffers& buffers, index_type start = index_type::zero()) // NOTE: this is almost std::find_if, except the != bound comparison is not sufficient in this case... such a shame...
  45. {
  46. auto size = get_size(buffers);
  47. while(start < size)
  48. {
  49. if(diff_at(buffers, start))
  50. break;
  51. ++start;
  52. }
  53. return start;
  54. }
  55. template <typename Buffers>
  56. std::pair<index_type, index_type> measure_change(const Buffers& buffers, index_type start, size_t min_distance)
  57. {
  58. auto remaining = get_size(buffers) - start;
  59. auto minmax = std::minmax_element(remaining.begin(), remaining.end());
  60. auto min_index = index_type::unit(minmax.first - remaining.begin());
  61. auto max_index = index_type::unit(minmax.second - remaining.begin());
  62. size_t change_size = 1;
  63. auto change = max_index * change_size;
  64. auto step = - max_index + min_index;
  65. while(change < remaining)
  66. {
  67. do
  68. {
  69. if(not diff_at(buffers, start + change))
  70. {
  71. auto next_change = find_change(buffers, start + change);
  72. auto distance = next_change - (start + change);
  73. if(not (distance < index_type::one(min_distance)))
  74. return {change, next_change};
  75. }
  76. change += step;
  77. }
  78. while(change < remaining);
  79. ++change_size;
  80. size_t excess; // NOTE: don't you hate it when edge cases are not cleanly handled by the main loop?... must figure this out...
  81. if(simple::support::sub_overflow(excess, change_size, *minmax.second - 1))
  82. excess = 0;
  83. change = max_index * (change_size - excess) + min_index * excess;
  84. }
  85. return {remaining, get_size(buffers)};
  86. }
  87. void showChange(index_type position, index_type change)
  88. {
  89. using simple::support::to_string;
  90. for(size_t i = 0; i < index_type::dimensions; ++i)
  91. std::puts( to_string<index_type::value_type>(simple::geom::segment<size_t>{change[i], position[i]}, ':').c_str());
  92. }
  93. auto split(std::vector<char> in, const std::string separator)
  94. {
  95. std::vector<std::string> ret;
  96. if(separator == "")
  97. simple::support::split(in, simple::support::match_iterator(simple::support::is_space), std::back_inserter(ret));
  98. else
  99. simple::support::split(in, separator, std::back_inserter(ret));
  100. return ret;
  101. }
  102. void diff(std::array<std::string,2> filenames, const char* separator, size_t distance)
  103. {
  104. const auto buffers = std::make_pair( dump(bropex(filenames[0])), dump(bropex(filenames[1])) );
  105. auto do_diff = [distance](const auto& buffers)
  106. {
  107. auto size = get_size(buffers);
  108. auto it = find_change(buffers, index_type::zero());
  109. while(it < size)
  110. {
  111. auto [change, next] = measure_change(buffers, it, distance);
  112. showChange(it, change);
  113. it = next;
  114. }
  115. auto change = size - it;
  116. if(index_type::zero() != change)
  117. showChange(it, change);
  118. };
  119. if(separator) do_diff(std::pair{ split(buffers.first, separator), split(buffers.second, separator) });
  120. else do_diff(buffers);
  121. }
  122. void process_arguments(std::deque<string> args)
  123. {
  124. std::array<std::string,2> filenames;
  125. size_t file_count = 0;
  126. size_t distance = 0;
  127. const char* separator = nullptr;
  128. args.pop_front();
  129. bool diffed = false;
  130. while(!args.empty())
  131. {
  132. switch(Option(args.front()))
  133. {
  134. case Options::Distance:
  135. args.pop_front();
  136. distance = simple::support::ston<size_t>(args.at(0));
  137. break;
  138. case Options::Wordwise: separator = ""; break;
  139. case Options::Linewise: separator = "\n"; break;
  140. default:
  141. filenames[file_count++] = args.at(0);
  142. if(file_count == 2)
  143. {
  144. if(diffed)
  145. std::puts("");
  146. else
  147. diffed = true;
  148. diff(filenames, separator, distance);
  149. file_count = 0;
  150. }
  151. break;
  152. }
  153. args.pop_front();
  154. }
  155. if(not diffed)
  156. std::fputs("Specify 2 files to diff!\n", stderr);
  157. }
  158. int main(int argc, char const* argv[]) try
  159. {
  160. process_arguments({argv, argv + argc});
  161. return 0;
  162. }
  163. catch(...)
  164. {
  165. if(errno) std::perror("Oh nooo!");
  166. throw;
  167. }