CharacterConverter.cc 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. /*
  2. TODO:
  3. - Clean up renderGraphics2, it is currently very hard to understand
  4. with all the masks and quarters etc.
  5. - Correctly implement vertical scroll in text modes.
  6. Can be implemented by reordering blitting, but uses a smaller
  7. wrap than GFX modes: 8 lines instead of 256 lines.
  8. */
  9. #include "CharacterConverter.hh"
  10. #include "VDP.hh"
  11. #include "VDPVRAM.hh"
  12. #include "build-info.hh"
  13. #include "components.hh"
  14. #include <cstdint>
  15. #ifdef __SSE2__
  16. #include "emmintrin.h" // SSE2
  17. #endif
  18. namespace openmsx {
  19. template <class Pixel>
  20. CharacterConverter<Pixel>::CharacterConverter(
  21. VDP& vdp_, const Pixel* palFg_, const Pixel* palBg_)
  22. : vdp(vdp_), vram(vdp.getVRAM()), palFg(palFg_), palBg(palBg_)
  23. {
  24. modeBase = 0; // not strictly needed, but avoids Coverity warning
  25. }
  26. template <class Pixel>
  27. void CharacterConverter<Pixel>::setDisplayMode(DisplayMode mode)
  28. {
  29. modeBase = mode.getBase();
  30. assert(modeBase < 0x0C);
  31. }
  32. template <class Pixel>
  33. void CharacterConverter<Pixel>::convertLine(Pixel* linePtr, int line)
  34. {
  35. // TODO: Support YJK on modes other than Graphic 6/7.
  36. switch (modeBase) {
  37. case DisplayMode::GRAPHIC1: // screen 1
  38. renderGraphic1(linePtr, line);
  39. break;
  40. case DisplayMode::TEXT1: // screen 0, width 40
  41. renderText1(linePtr, line);
  42. break;
  43. case DisplayMode::MULTICOLOR: // screen 3
  44. renderMulti(linePtr, line);
  45. break;
  46. case DisplayMode::GRAPHIC2: // screen 2
  47. renderGraphic2(linePtr, line);
  48. break;
  49. case DisplayMode::GRAPHIC3: // screen 4
  50. renderGraphic2(linePtr, line); // graphic3, actually
  51. break;
  52. case DisplayMode::TEXT2: // screen 0, width 80
  53. renderText2(linePtr, line);
  54. break;
  55. case DisplayMode::TEXT1Q: // TMSxxxx only
  56. if (vdp.isMSX1VDP()) {
  57. renderText1Q(linePtr, line);
  58. } else {
  59. renderBlank (linePtr);
  60. }
  61. break;
  62. case DisplayMode::MULTIQ: // TMSxxxx only
  63. if (vdp.isMSX1VDP()) {
  64. renderMultiQ(linePtr, line);
  65. } else {
  66. renderBlank (linePtr);
  67. }
  68. break;
  69. default: // remaining (non-bitmap) modes
  70. if (vdp.isMSX1VDP()) {
  71. renderBogus(linePtr);
  72. } else {
  73. renderBlank(linePtr);
  74. }
  75. }
  76. }
  77. #ifdef __SSE2__
  78. // Copied from Scale2xScaler.cc, TODO move to common location?
  79. static inline __m128i select(__m128i a0, __m128i a1, __m128i mask)
  80. {
  81. return _mm_xor_si128(_mm_and_si128(_mm_xor_si128(a0, a1), mask), a0);
  82. }
  83. #endif
  84. template<typename Pixel> static inline void draw6(
  85. Pixel* __restrict & pixelPtr, Pixel fg, Pixel bg, byte pattern)
  86. {
  87. pixelPtr[0] = (pattern & 0x80) ? fg : bg;
  88. pixelPtr[1] = (pattern & 0x40) ? fg : bg;
  89. pixelPtr[2] = (pattern & 0x20) ? fg : bg;
  90. pixelPtr[3] = (pattern & 0x10) ? fg : bg;
  91. pixelPtr[4] = (pattern & 0x08) ? fg : bg;
  92. pixelPtr[5] = (pattern & 0x04) ? fg : bg;
  93. pixelPtr += 6;
  94. }
  95. template<typename Pixel> static inline void draw8(
  96. Pixel* __restrict & pixelPtr, Pixel fg, Pixel bg, byte pattern)
  97. {
  98. #ifdef __SSE2__
  99. // SSE2 version, 32bpp (16bpp is possible, but not worth it anymore)
  100. if (sizeof(Pixel) == 4) {
  101. const __m128i m74 = _mm_set_epi32(0x10, 0x20, 0x40, 0x80);
  102. const __m128i m30 = _mm_set_epi32(0x01, 0x02, 0x04, 0x08);
  103. const __m128i zero = _mm_setzero_si128();
  104. __m128i fg4 = _mm_set1_epi32(fg);
  105. __m128i bg4 = _mm_set1_epi32(bg);
  106. __m128i pat = _mm_set1_epi32(pattern);
  107. __m128i b74 = _mm_cmpeq_epi32(_mm_and_si128(pat, m74), zero);
  108. __m128i b30 = _mm_cmpeq_epi32(_mm_and_si128(pat, m30), zero);
  109. auto* out = reinterpret_cast<__m128i*>(pixelPtr);
  110. _mm_storeu_si128(out + 0, select(fg4, bg4, b74));
  111. _mm_storeu_si128(out + 1, select(fg4, bg4, b30));
  112. pixelPtr += 8;
  113. return;
  114. }
  115. #endif
  116. // C++ version
  117. pixelPtr[0] = (pattern & 0x80) ? fg : bg;
  118. pixelPtr[1] = (pattern & 0x40) ? fg : bg;
  119. pixelPtr[2] = (pattern & 0x20) ? fg : bg;
  120. pixelPtr[3] = (pattern & 0x10) ? fg : bg;
  121. pixelPtr[4] = (pattern & 0x08) ? fg : bg;
  122. pixelPtr[5] = (pattern & 0x04) ? fg : bg;
  123. pixelPtr[6] = (pattern & 0x02) ? fg : bg;
  124. pixelPtr[7] = (pattern & 0x01) ? fg : bg;
  125. pixelPtr += 8;
  126. }
  127. template <class Pixel>
  128. void CharacterConverter<Pixel>::renderText1(
  129. Pixel* __restrict pixelPtr, int line)
  130. {
  131. Pixel fg = palFg[vdp.getForegroundColor()];
  132. Pixel bg = palFg[vdp.getBackgroundColor()];
  133. // 8 * 256 is small enough to always be contiguous
  134. const byte* patternArea = vram.patternTable.getReadArea(0, 256 * 8);
  135. patternArea += (line + vdp.getVerticalScroll()) & 7;
  136. // Note: Because line width is not a power of two, reading an entire line
  137. // from a VRAM pointer returned by readArea will not wrap the index
  138. // correctly. Therefore we read one character at a time.
  139. unsigned nameStart = (line / 8) * 40;
  140. unsigned nameEnd = nameStart + 40;
  141. for (unsigned name = nameStart; name < nameEnd; ++name) {
  142. unsigned charcode = vram.nameTable.readNP((name + 0xC00) | (~0u << 12));
  143. unsigned pattern = patternArea[charcode * 8];
  144. draw6(pixelPtr, fg, bg, pattern);
  145. }
  146. }
  147. template <class Pixel>
  148. void CharacterConverter<Pixel>::renderText1Q(
  149. Pixel* __restrict pixelPtr, int line)
  150. {
  151. Pixel fg = palFg[vdp.getForegroundColor()];
  152. Pixel bg = palFg[vdp.getBackgroundColor()];
  153. unsigned patternBaseLine = (~0u << 13) | ((line + vdp.getVerticalScroll()) & 7);
  154. // Note: Because line width is not a power of two, reading an entire line
  155. // from a VRAM pointer returned by readArea will not wrap the index
  156. // correctly. Therefore we read one character at a time.
  157. unsigned nameStart = (line / 8) * 40;
  158. unsigned nameEnd = nameStart + 40;
  159. unsigned patternQuarter = (line & 0xC0) << 2;
  160. for (unsigned name = nameStart; name < nameEnd; ++name) {
  161. unsigned charcode = vram.nameTable.readNP((name + 0xC00) | (~0u << 12));
  162. unsigned patternNr = patternQuarter | charcode;
  163. unsigned pattern = vram.patternTable.readNP(
  164. patternBaseLine | (patternNr * 8));
  165. draw6(pixelPtr, fg, bg, pattern);
  166. }
  167. }
  168. template <class Pixel>
  169. void CharacterConverter<Pixel>::renderText2(
  170. Pixel* __restrict pixelPtr, int line)
  171. {
  172. Pixel plainFg = palFg[vdp.getForegroundColor()];
  173. Pixel plainBg = palFg[vdp.getBackgroundColor()];
  174. Pixel blinkFg, blinkBg;
  175. if (vdp.getBlinkState()) {
  176. int fg = vdp.getBlinkForegroundColor();
  177. blinkFg = palBg[fg ? fg : vdp.getBlinkBackgroundColor()];
  178. blinkBg = palBg[vdp.getBlinkBackgroundColor()];
  179. } else {
  180. blinkFg = plainFg;
  181. blinkBg = plainBg;
  182. }
  183. // 8 * 256 is small enough to always be contiguous
  184. const byte* patternArea = vram.patternTable.getReadArea(0, 256 * 8);
  185. patternArea += (line + vdp.getVerticalScroll()) & 7;
  186. unsigned colorStart = (line / 8) * (80 / 8);
  187. unsigned nameStart = (line / 8) * 80;
  188. for (unsigned i = 0; i < (80 / 8); ++i) {
  189. unsigned colorPattern = vram.colorTable.readNP(
  190. (colorStart + i) | (~0u << 9));
  191. const byte* nameArea = vram.nameTable.getReadArea(
  192. (nameStart + 8 * i) | (~0u << 12), 8);
  193. draw6(pixelPtr,
  194. (colorPattern & 0x80) ? blinkFg : plainFg,
  195. (colorPattern & 0x80) ? blinkBg : plainBg,
  196. patternArea[nameArea[0] * 8]);
  197. draw6(pixelPtr,
  198. (colorPattern & 0x40) ? blinkFg : plainFg,
  199. (colorPattern & 0x40) ? blinkBg : plainBg,
  200. patternArea[nameArea[1] * 8]);
  201. draw6(pixelPtr,
  202. (colorPattern & 0x20) ? blinkFg : plainFg,
  203. (colorPattern & 0x20) ? blinkBg : plainBg,
  204. patternArea[nameArea[2] * 8]);
  205. draw6(pixelPtr,
  206. (colorPattern & 0x10) ? blinkFg : plainFg,
  207. (colorPattern & 0x10) ? blinkBg : plainBg,
  208. patternArea[nameArea[3] * 8]);
  209. draw6(pixelPtr,
  210. (colorPattern & 0x08) ? blinkFg : plainFg,
  211. (colorPattern & 0x08) ? blinkBg : plainBg,
  212. patternArea[nameArea[4] * 8]);
  213. draw6(pixelPtr,
  214. (colorPattern & 0x04) ? blinkFg : plainFg,
  215. (colorPattern & 0x04) ? blinkBg : plainBg,
  216. patternArea[nameArea[5] * 8]);
  217. draw6(pixelPtr,
  218. (colorPattern & 0x02) ? blinkFg : plainFg,
  219. (colorPattern & 0x02) ? blinkBg : plainBg,
  220. patternArea[nameArea[6] * 8]);
  221. draw6(pixelPtr,
  222. (colorPattern & 0x01) ? blinkFg : plainFg,
  223. (colorPattern & 0x01) ? blinkBg : plainBg,
  224. patternArea[nameArea[7] * 8]);
  225. }
  226. }
  227. template <class Pixel>
  228. const byte* CharacterConverter<Pixel>::getNamePtr(int line, int scroll)
  229. {
  230. // no need to test whether multi-page scrolling is enabled,
  231. // indexMask in the nameTable already takes care of it
  232. return vram.nameTable.getReadArea(
  233. ((line / 8) * 32) | ((scroll & 0x20) ? 0x8000 : 0), 32);
  234. }
  235. template <class Pixel>
  236. void CharacterConverter<Pixel>::renderGraphic1(
  237. Pixel* __restrict pixelPtr, int line)
  238. {
  239. const byte* patternArea = vram.patternTable.getReadArea(0, 256 * 8);
  240. patternArea += line & 7;
  241. const byte* colorArea = vram.colorTable.getReadArea(0, 256 / 8);
  242. int scroll = vdp.getHorizontalScrollHigh();
  243. const byte* namePtr = getNamePtr(line, scroll);
  244. for (unsigned n = 0; n < 32; ++n) {
  245. unsigned charcode = namePtr[scroll & 0x1F];
  246. unsigned pattern = patternArea[charcode * 8];
  247. unsigned color = colorArea[charcode / 8];
  248. Pixel fg = palFg[color >> 4];
  249. Pixel bg = palFg[color & 0x0F];
  250. draw8(pixelPtr, fg, bg, pattern);
  251. if (!(++scroll & 0x1F)) namePtr = getNamePtr(line, scroll);
  252. }
  253. }
  254. template <class Pixel>
  255. void CharacterConverter<Pixel>::renderGraphic2(
  256. Pixel* __restrict pixelPtr, int line)
  257. {
  258. int quarter8 = (((line / 8) * 32) & ~0xFF) * 8;
  259. int line7 = line & 7;
  260. int scroll = vdp.getHorizontalScrollHigh();
  261. const byte* namePtr = getNamePtr(line, scroll);
  262. if (vram.colorTable .isContinuous((8 * 256) - 1) &&
  263. vram.patternTable.isContinuous((8 * 256) - 1) &&
  264. ((scroll & 0x1f) == 0)) {
  265. // Both color and pattern table can be accessed contiguously
  266. // (no mirroring) and there's no v9958 horizontal scrolling.
  267. // This is very common, so make an optimized version for this.
  268. const byte* patternArea = vram.patternTable.getReadArea(quarter8, 8 * 256) + line7;
  269. const byte* colorArea = vram.colorTable .getReadArea(quarter8, 8 * 256) + line7;
  270. for (unsigned n = 0; n < 32; ++n) {
  271. unsigned charCode8 = namePtr[n] * 8;
  272. unsigned pattern = patternArea[charCode8];
  273. unsigned color = colorArea [charCode8];
  274. Pixel fg = palFg[color >> 4];
  275. Pixel bg = palFg[color & 0x0F];
  276. draw8(pixelPtr, fg, bg, pattern);
  277. }
  278. } else {
  279. // Slower variant, also works when:
  280. // - there is mirroring in the color table
  281. // - there is mirroring in the pattern table (TMS9929)
  282. // - V9958 horizontal scroll feature is used
  283. int baseLine = (~0u << 13) | quarter8 | line7;
  284. for (unsigned n = 0; n < 32; ++n) {
  285. unsigned charCode8 = namePtr[scroll & 0x1F] * 8;
  286. unsigned index = charCode8 | baseLine;
  287. unsigned pattern = vram.patternTable.readNP(index);
  288. unsigned color = vram.colorTable .readNP(index);
  289. Pixel fg = palFg[color >> 4];
  290. Pixel bg = palFg[color & 0x0F];
  291. draw8(pixelPtr, fg, bg, pattern);
  292. if (!(++scroll & 0x1F)) namePtr = getNamePtr(line, scroll);
  293. }
  294. }
  295. }
  296. template <class Pixel>
  297. void CharacterConverter<Pixel>::renderMultiHelper(
  298. Pixel* __restrict pixelPtr, int line,
  299. int mask, int patternQuarter)
  300. {
  301. unsigned baseLine = mask | ((line / 4) & 7);
  302. unsigned scroll = vdp.getHorizontalScrollHigh();
  303. const byte* namePtr = getNamePtr(line, scroll);
  304. for (unsigned n = 0; n < 32; ++n) {
  305. unsigned patternNr = patternQuarter | namePtr[scroll & 0x1F];
  306. unsigned color = vram.patternTable.readNP((patternNr * 8) | baseLine);
  307. Pixel cl = palFg[color >> 4];
  308. Pixel cr = palFg[color & 0x0F];
  309. pixelPtr[0] = cl; pixelPtr[1] = cl;
  310. pixelPtr[2] = cl; pixelPtr[3] = cl;
  311. pixelPtr[4] = cr; pixelPtr[5] = cr;
  312. pixelPtr[6] = cr; pixelPtr[7] = cr;
  313. pixelPtr += 8;
  314. if (!(++scroll & 0x1F)) namePtr = getNamePtr(line, scroll);
  315. }
  316. }
  317. template <class Pixel>
  318. void CharacterConverter<Pixel>::renderMulti(
  319. Pixel* __restrict pixelPtr, int line)
  320. {
  321. int mask = (~0u << 11);
  322. renderMultiHelper(pixelPtr, line, mask, 0);
  323. }
  324. template <class Pixel>
  325. void CharacterConverter<Pixel>::renderMultiQ(
  326. Pixel* __restrict pixelPtr, int line)
  327. {
  328. int mask = (~0u << 13);
  329. int patternQuarter = (line * 4) & ~0xFF; // (line / 8) * 32
  330. renderMultiHelper(pixelPtr, line, mask, patternQuarter);
  331. }
  332. template <class Pixel>
  333. void CharacterConverter<Pixel>::renderBogus(
  334. Pixel* __restrict pixelPtr)
  335. {
  336. Pixel fg = palFg[vdp.getForegroundColor()];
  337. Pixel bg = palFg[vdp.getBackgroundColor()];
  338. for (int n = 8; n--; ) *pixelPtr++ = bg;
  339. for (int c = 40; c--; ) {
  340. for (int n = 4; n--; ) *pixelPtr++ = fg;
  341. for (int n = 2; n--; ) *pixelPtr++ = bg;
  342. }
  343. for (int n = 8; n--; ) *pixelPtr++ = bg;
  344. }
  345. template <class Pixel>
  346. void CharacterConverter<Pixel>::renderBlank(
  347. Pixel* __restrict pixelPtr)
  348. {
  349. // when this is in effect, the VRAM is not refreshed anymore, but that
  350. // is not emulated
  351. for (int n = 256; n--; ) *pixelPtr++ = palFg[15];
  352. }
  353. // Force template instantiation.
  354. #if HAVE_16BPP
  355. template class CharacterConverter<uint16_t>;
  356. #endif
  357. #if HAVE_32BPP || COMPONENT_GL
  358. template class CharacterConverter<uint32_t>;
  359. #endif
  360. } // namespace openmsx