synthesise.c 12 KB


  1. /*
  2. *
  3. * Copyright (C) 2017 Cafe Beverage. All rights reserved.
  4. *
  5. * This program is free software and is provided to you under the terms of the
  6. * GNU General Public License version 2 as published by the Free Software
  7. * Foundation, and any use by you of this program is subject to the terms
  8. * of such GNU licence.
  9. *
  10. * A copy of the licence is included with the program, and can also be obtained
  11. * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  12. * Boston, MA 02110-1301, USA.
  13. *
  14. */
  15. #include <stdlib.h>
  16. #include <memory.h>
  17. #include <pandriver.h>
  18. #define SV_OFFSET (0x4000)
  19. #define XYZ_COMPONENT_COUNT 3
  20. #define INDEX_FRAGMENT 1
  21. int atom_count = 0;
  22. struct mali_jd_dependency no_dependency = {
  23. .atom_id = 0,
  24. .dependency_type = MALI_JD_DEP_TYPE_INVALID
  25. };
  26. struct job_descriptor_header* set_value_helper(uint64_t out)
  27. {
  28. void* packet = galloc(sizeof(struct job_descriptor_header) +
  29. sizeof(struct payload_set_value));
  30. struct job_descriptor_header header = {
  31. .exception_status = JOB_NOT_STARTED,
  32. .job_descriptor_size = JOB_64_BIT,
  33. .job_type = JOB_TYPE_SET_VALUE
  34. };
  35. struct payload_set_value payload = {
  36. .out = out,
  37. .unknown = 0x03
  38. };
  39. memcpy(packet, &header, sizeof(header));
  40. memcpy(packet + sizeof(header), &payload, sizeof(payload));
  41. return packet;
  42. }
  43. uint64_t make_mfbd(bool tiler, uint64_t heap_free_address, uint64_t scratchpad)
  44. {
  45. struct tentative_mfbd *mfbd = galloc(sizeof(struct tentative_mfbd));
  46. uint64_t sab0 = 0x5ABA5ABA;
  47. mfbd->block2[0] = scratchpad + SV_OFFSET;
  48. mfbd->block2[1] = scratchpad + SV_OFFSET + 0x200;
  49. mfbd->ugaT = scratchpad;
  50. mfbd->unknown2 = heap_free_address | 0x8000000;
  51. mfbd->flags = 0xF0;
  52. mfbd->heap_free_address = heap_free_address;
  53. mfbd->blah = 0x1F00000000;
  54. mfbd->unknown1 = 0x1600;
  55. if (!tiler)
  56. mfbd->unknown3 = 0xFFFFF8C0;
  57. mfbd->block1[4] = 0x02D801C2;
  58. mfbd->block1[6] = 0x02D801C2;
  59. /* This might not a tiler issue so much as a which-frame issue.
  60. * First tiler is 0xFF form. Rest of C021. All fragment C021.
  61. * TODO: Investigate!
  62. */
  63. mfbd->block1[7] = tiler ? 0x04001080 : 0x01001080;
  64. mfbd->block1[8] = tiler ? 0x000000FF : 0xC0210000;
  65. mfbd->block1[9] = tiler ? 0x3F800000 : 0x00000000;
  66. uint64_t block3[] = {
  67. 0x0000000000000000,
  68. 0x0000000000030005,
  69. sab0,
  70. mfbd->block2[0],
  71. 0x0000000000000003,
  72. 0x0000000000000000,
  73. 0x0000000000000000,
  74. 0x0000000000000000,
  75. sab0 + 0x300,
  76. };
  77. memcpy(mfbd->block3, block3, sizeof(block3));
  78. return (uint32_t) mfbd | MFBD | (tiler ? FBD_TILER : FBD_FRAGMENT);
  79. }
  80. uint32_t job_chain_fragment(int fd, uint64_t framebuffer,
  81. uint64_t heap_free_address, uint64_t scratchpad)
  82. {
  83. void* packet = galloc(sizeof(struct job_descriptor_header)
  84. + sizeof(struct payload_fragment));
  85. struct job_descriptor_header header = {
  86. .exception_status = JOB_NOT_STARTED,
  87. .job_descriptor_size = JOB_32_BIT,
  88. .job_type = JOB_TYPE_FRAGMENT,
  89. .job_index = INDEX_FRAGMENT,
  90. };
  91. struct payload_fragment payload = {
  92. .min_tile_coord = MAKE_TILE_COORD(0, 0, 0),
  93. .max_tile_coord = MAKE_TILE_COORD(29, 45, 0),
  94. .fragment_fbd = make_mfbd(false, heap_free_address, scratchpad)
  95. };
  96. memcpy(packet, &header, sizeof(header));
  97. memcpy(packet + sizeof(header), &payload, sizeof(payload));
  98. struct mali_jd_dependency depTiler = {
  99. .atom_id = atom_count /* last one */,
  100. .dependency_type = MALI_JD_DEP_TYPE_DATA
  101. };
  102. uint64_t* resource = calloc(sizeof(u64), 1);
  103. resource[0] = framebuffer | MALI_EXT_RES_ACCESS_EXCLUSIVE;
  104. /* TODO: free resource */
  105. struct mali_jd_atom_v2 job = {
  106. .jc = (uint32_t) packet,
  107. .ext_res_list = (struct mali_external_resource*) resource /* TODO */,
  108. .nr_ext_res = 1,
  109. .core_req = MALI_JD_REQ_EXTERNAL_RESOURCES | MALI_JD_REQ_FS,
  110. .atom_number = ++atom_count,
  111. .prio = MALI_JD_PRIO_MEDIUM,
  112. .device_nr = 0,
  113. .pre_dep = { depTiler, no_dependency }
  114. };
  115. submit_job(fd, job);
  116. return (uint32_t) packet;
  117. }
  118. uint64_t import_shader(int fd, uint8_t *shader, size_t sz, bool fragment)
  119. {
  120. int pages = 1 + (sz >> PAGE_SHIFT);
  121. uint64_t gpu = alloc_gpu_pages(
  122. fd, pages,
  123. MALI_MEM_PROT_CPU_RD | MALI_MEM_PROT_CPU_WR | MALI_MEM_PROT_GPU_RD |
  124. MALI_MEM_PROT_GPU_EX);
  125. uint8_t *cpu = mmap_gpu(fd, gpu, pages);
  126. memcpy(cpu, shader, sz);
  127. /* TODO: munmap */
  128. return gpu | SHADER | (fragment ? SHADER_FRAGMENT : SHADER_VERTEX);
  129. }
  130. uint32_t upload_vertices(float *vertices, size_t sz)
  131. {
  132. struct attribute_buffer *vb =
  133. (struct attribute_buffer*) galloc(sizeof(*vb));
  134. float *verts = galloc(sz);
  135. memcpy(verts, vertices, sz);
  136. vb->elements = (uint64_t) (uintptr_t) verts;
  137. vb->element_size = sizeof(float) * XYZ_COMPONENT_COUNT;
  138. vb->total_size = sz;
  139. vb->elements |= 1; /* TODO flags */
  140. return (uint32_t) vb;
  141. }
  142. struct job_descriptor_header* vertex_tiler_helper(int fd, bool tiler,
  143. uint32_t fbd,
  144. uint32_t vertex_buffer,
  145. uint32_t zero_buffer,
  146. uint32_t mode,
  147. void *shader,
  148. size_t shader_size)
  149. {
  150. void* packet = galloc(sizeof(struct job_descriptor_header) +
  151. sizeof(struct payload_vertex_tiler32));
  152. struct job_descriptor_header header = {
  153. .exception_status = JOB_NOT_STARTED,
  154. .job_descriptor_size = JOB_32_BIT,
  155. .job_type = tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX
  156. };
  157. /* TODO */
  158. uint32_t mode_gooks = 0x14000000 | (tiler ? (0x030000 | mode) : 0);
  159. uint32_t other_gook = tiler ? 0x00000003 : 0x00000000;
  160. struct payload_vertex_tiler32 payload = {
  161. .block1 = {
  162. 0x00000003, 0x28000000, mode_gooks, 0x00000000,
  163. 0x00000000, other_gook, 0x00000000, 0x00000000,
  164. 0x00000005, 0x00000000, 0x00000000
  165. },
  166. .zeroes = zero_buffer,
  167. .unknown1 = (uint32_t) galloc(16),
  168. .null1 = 0,
  169. .null2 = 0,
  170. .unknown2 = (uint32_t) galloc(32),
  171. .shader = (uint32_t) galloc(sizeof(struct shader_meta)),
  172. .attributes = vertex_buffer,
  173. .attribute_meta = (uint32_t) galloc(16), /* TODO */
  174. .unknown5 = (uint32_t) galloc(32),
  175. .unknown6 = (uint32_t) galloc(64),
  176. .nullForVertex = tiler ? (uint32_t) galloc(64) : 0,
  177. .null4 = 0,
  178. .fbd = fbd,
  179. .unknown7 = tiler ? 0 : ((uint32_t) galloc(64) | 1) /* TODO */
  180. };
  181. struct shader_meta *s = (struct shader_meta*) payload.shader;
  182. s->shader = import_shader(fd, shader, shader_size, tiler);
  183. if (!tiler) {
  184. uint32_t ni[] = {
  185. 0x43200000, 0x42F00000, 0x3F000000, 0x00000000,
  186. 0x43200000, 0x42F00000, 0x3F000000, 0x00000000
  187. };
  188. memcpy((void*) payload.unknown2, ni, sizeof(ni));
  189. }
  190. if (tiler) {
  191. /* Lose precision... on purpose? */
  192. payload.unknown7 = (uint32_t) s->shader;
  193. }
  194. payload.unknown7 = tiler ? 0xDEADBA00 : 0xDEADFA00;
  195. /* TODO: Decode me! */
  196. if (tiler) {
  197. s->unknown1 = 0x0007000000000000;
  198. s->unknown2 = 0x0000000000020602;
  199. } else {
  200. s->unknown1 = 0x0005000100000000;
  201. s->unknown2 = 0x0000000000420002;
  202. }
  203. /* TODO: Generate on the fly (see trace.c) */
  204. uint32_t *p = (uint32_t*) payload.attribute_meta;
  205. *p = 0x2DEA2200;
  206. /* I have *no* idea */
  207. uint64_t pi[] = {
  208. 0x0000000017E49000, 0x0000000017E49000,
  209. 0x0000000017E49000, 0x0000000017E49000,
  210. 0x00000000179A2200, 0x0000000017E49000,
  211. 0x0000000017E49000
  212. };
  213. memcpy((void*) payload.unknown6, pi, sizeof(pi));
  214. if (tiler) {
  215. uint32_t ni[] = {
  216. 0xFF800000, 0xFF800000,
  217. 0x7F800000, 0x7F800000,
  218. 0x00000000, 0x3F800000,
  219. 0x00000000, 0x00EF013F,
  220. 0x00000000, 0x0000001F,
  221. 0x02020000, 0x00000001
  222. };
  223. memcpy((void*) payload.nullForVertex, ni, sizeof(ni));
  224. }
  225. /* Use some magic numbers from the traces */
  226. uint64_t* unk1 = (uint64_t*) payload.unknown1;
  227. /* unk1[0] = 0x000000B296271001;
  228. unk1[1] = 0x000000B296273000; */
  229. unk1[0] = 0x5a5a5a5a5a5a1001;
  230. unk1[1] = 0x5a5a5a5a5a5a3000;
  231. uint32_t writeBuffer = (uint32_t) galloc(64);
  232. uint64_t* unk5 = (uint64_t*) payload.unknown5;
  233. unk5[0] = ((uint64_t) (tiler ? 0xDB : 0x7A) << 56) | writeBuffer | 1;
  234. unk5[1] = 0x0000004000000010;
  235. if (tiler) {
  236. uint32_t ni[] = {
  237. 0x00000001, 0x00000000, 0x00070000, 0x00020602,
  238. 0x00000000, 0x00000000, 0x00000000, 0x3712FFFF,
  239. 0x44F0FFFF, 0x0007FF00, 0x0007FF00, 0x00000000,
  240. 0x00000000, 0x00000000, 0x00000000, 0x00000200,
  241. 0x00000000, 0xF0122122, 0x00000000, 0x00000000,
  242. 0x00000000, 0xF0122122, 0x00000000, 0xFF800000,
  243. 0xFF800000, 0x7F800000, 0x7F800000, 0x00000000,
  244. 0x3F800000, 0x00000000, 0xEF013F00, 0x00000000,
  245. 0x0000001F, 0x02020000, 0x00000001, 0x00000000
  246. };
  247. memcpy(payload.block2, ni, sizeof(ni));
  248. } else {
  249. uint32_t ni[] = {
  250. 0x00000000, 0x0000000C, 0x00000030, 0x2DEA2200,
  251. 0x00000000, 0x00000000, 0x00000000, /* Address to 1 */ 0xCAFEDA01,
  252. 0x57000000, 0x00000010, 0x00000040, 0x17E49000,
  253. 0x00000000, 0x17E49000, 0x00000000, 0x17E49000,
  254. 0x00000000, 0x17E49000, 0x00000000, 0x179A2200,
  255. 0x00000000, 0x17E49000, 0x00000000, 0x17E49000,
  256. 0x00000000, 0x00000000, 0x00000000, 0x43200000,
  257. 0x42F00000, 0x3F000000, 0x00000000, 0x43200000,
  258. 0x42F00000, 0x3F000000, 0x00000000, 0x00000000
  259. };
  260. memcpy(payload.block2, ni, sizeof(ni));
  261. }
  262. /* Trap tiler job execution */
  263. if (tiler) {
  264. payload.shader = 0x5AB00A05;
  265. /* Hit second */
  266. //payload.zeroes = 0x5AB01A00;
  267. payload.unknown1 = 0x5AB02A00;
  268. payload.unknown2 = 0x5AB03A00;
  269. payload.attributes = 0x5AB04A00;
  270. payload.attribute_meta = 0x5AB05A00;
  271. payload.unknown5 = 0x5AB06A00;
  272. payload.unknown6 = 0x5AB07A00;
  273. payload.unknown7 = 0x5AB0DA00;
  274. /* Hit third */
  275. //payload.fbd = 0x5AB09A00;
  276. /* Hit first */
  277. // payload.nullForVertex = 0x5AB08A00;
  278. }
  279. memcpy(packet, &header, sizeof(header));
  280. memcpy(packet + sizeof(header), &payload, sizeof(payload));
  281. return packet;
  282. }
  283. uint32_t job_chain_vertex_tiler(int fd, float *vertices, size_t vertex_size,
  284. int mode, void* vertex_shader, size_t vs_sz,
  285. void *fragment_shader, size_t fs_sz,
  286. uint64_t heap_free_address, uint64_t scratchpad)
  287. {
  288. uint32_t vertex_buffer = upload_vertices(vertices, vertex_size);
  289. uint32_t vertex_fbd = (uint32_t) make_mfbd(true, heap_free_address,
  290. scratchpad);
  291. uint32_t zero_buffer = (uint32_t) alloc_gpu_pages(
  292. fd, 0x20,
  293. 0x3800 | MALI_MEM_PROT_CPU_RD | MALI_MEM_PROT_CPU_WR |
  294. MALI_MEM_PROT_GPU_RD);
  295. struct job_descriptor_header *set =
  296. set_value_helper(scratchpad + SV_OFFSET);
  297. struct job_descriptor_header *vertex =
  298. vertex_tiler_helper(fd, false,
  299. vertex_fbd, vertex_buffer,
  300. zero_buffer, mode,
  301. vertex_shader, vs_sz);
  302. struct job_descriptor_header *tiler =
  303. vertex_tiler_helper(fd, true,
  304. vertex_fbd, vertex_buffer,
  305. zero_buffer, mode,
  306. fragment_shader, fs_sz);
  307. set->next_job._32 = (uint32_t) vertex;
  308. vertex->next_job._32 = (uint32_t) tiler;
  309. /* TODO: Determine if these numbers are meaningful */
  310. set->job_index = 3;
  311. vertex->job_index = 1;
  312. tiler->job_index = 2;
  313. vertex->job_dependency_index_2 = set->job_index;
  314. tiler->job_dependency_index_1 = vertex->job_index;
  315. struct mali_jd_atom_v2 job = {
  316. .jc = (uint32_t) set,
  317. .ext_res_list = NULL,
  318. .nr_ext_res = 0,
  319. .core_req =
  320. MALI_JD_REQ_CS | MALI_JD_REQ_T | MALI_JD_REQ_CF |
  321. MALI_JD_REQ_COHERENT_GROUP,
  322. .atom_number = ++atom_count,
  323. .prio = MALI_JD_PRIO_MEDIUM,
  324. .device_nr = 0,
  325. .pre_dep = { no_dependency, no_dependency }
  326. };
  327. submit_job(fd, job);
  328. return (uint32_t) tiler;
  329. }
  330. void job_chain_replay(int fd, uint32_t tiler_jc, uint32_t fragment_jc,
  331. uint64_t heap_free_address, uint64_t framebuffer)
  332. {
  333. struct mali_jd_replay_payload *payload;
  334. struct mali_jd_dependency depFragment = {
  335. .atom_id = atom_count,
  336. .dependency_type = MALI_JD_DEP_TYPE_DATA
  337. };
  338. uint64_t *resource;
  339. payload = (struct mali_jd_replay_payload*) galloc(sizeof(*payload));
  340. payload->tiler_jc_list = tiler_jc;
  341. payload->fragment_jc = fragment_jc;
  342. payload->tiler_heap_free = heap_free_address;
  343. payload->fragment_hierarchy_mask = 0;
  344. payload->tiler_hierarchy_mask = 0;
  345. payload->hierarchy_default_weight = 0x10000;
  346. payload->tiler_core_req = MALI_JD_REQ_T | MALI_JD_REQ_COHERENT_GROUP;
  347. payload->fragment_core_req = MALI_JD_REQ_FS;
  348. resource = malloc(sizeof(u64) * 1);
  349. resource[0] = framebuffer | MALI_EXT_RES_ACCESS_EXCLUSIVE;
  350. struct mali_jd_atom_v2 job = {
  351. .jc = (uint32_t) payload,
  352. .ext_res_list = (struct mali_external_resource*)resource,
  353. .nr_ext_res = 1,
  354. .core_req =
  355. MALI_JD_REQ_EXTERNAL_RESOURCES | MALI_JD_REQ_SOFT_REPLAY,
  356. .atom_number = ++atom_count,
  357. .prio = MALI_JD_PRIO_LOW,
  358. .device_nr = 0,
  359. .pre_dep = { depFragment, no_dependency }
  360. };
  361. submit_job(fd, job);
  362. }