TextureConverter.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. // Copyright 2008 Dolphin Emulator Project
  2. // Licensed under GPLv2+
  3. // Refer to the license.txt file included.
  4. // Fast image conversion using OpenGL shaders.
  5. #include <string>
  6. #include "Common/FileUtil.h"
  7. #include "Common/StringUtil.h"
  8. #include "Core/HW/Memmap.h"
  9. #include "VideoBackends/OGL/FramebufferManager.h"
  10. #include "VideoBackends/OGL/ProgramShaderCache.h"
  11. #include "VideoBackends/OGL/Render.h"
  12. #include "VideoBackends/OGL/SamplerCache.h"
  13. #include "VideoBackends/OGL/TextureCache.h"
  14. #include "VideoBackends/OGL/TextureConverter.h"
  15. #include "VideoCommon/DriverDetails.h"
  16. #include "VideoCommon/ImageWrite.h"
  17. #include "VideoCommon/TextureConversionShader.h"
  18. #include "VideoCommon/VideoConfig.h"
  19. namespace OGL
  20. {
  21. namespace TextureConverter
  22. {
  23. using OGL::TextureCache;
  24. static GLuint s_texConvFrameBuffer[2] = {0,0};
  25. static GLuint s_srcTexture = 0; // for decoding from RAM
  26. static GLuint s_dstTexture = 0; // for encoding to RAM
  27. const int renderBufferWidth = EFB_WIDTH * 4;
  28. const int renderBufferHeight = 1024;
  29. static SHADER s_rgbToYuyvProgram;
  30. static int s_rgbToYuyvUniform_loc;
  31. static SHADER s_yuyvToRgbProgram;
  32. // Not all slots are taken - but who cares.
  33. const u32 NUM_ENCODING_PROGRAMS = 64;
  34. static SHADER s_encodingPrograms[NUM_ENCODING_PROGRAMS];
  35. static int s_encodingUniforms[NUM_ENCODING_PROGRAMS];
  36. static GLuint s_PBO = 0; // for readback with different strides
  37. static void CreatePrograms()
  38. {
  39. /* TODO: Accuracy Improvements
  40. *
  41. * This shader doesn't really match what the GameCube does internally in the
  42. * copy pipeline.
  43. * 1. It uses OpenGL's built in filtering when yscaling, someone could work
  44. * out how the copypipeline does it's filtering and implement it correctly
  45. * in this shader.
  46. * 2. Deflickering isn't implemented, a futher filtering over 3 lines.
  47. * Isn't really needed on non-interlaced monitors (and would lower quality;
  48. * But hey, accuracy!)
  49. * 3. Flipper's YUYV conversion implements a 3 pixel horizontal blur on the
  50. * UV channels, centering the U channel on the Left pixel and the V channel
  51. * on the Right pixel.
  52. * The current implementation Centers both UV channels at the same place
  53. * inbetween the two Pixels, and only blurs over these two pixels.
  54. */
  55. // Output is BGRA because that is slightly faster than RGBA.
  56. const char *VProgramRgbToYuyv =
  57. "out vec2 uv0;\n"
  58. "uniform vec4 copy_position;\n" // left, top, right, bottom
  59. "SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n"
  60. "void main()\n"
  61. "{\n"
  62. " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n"
  63. " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n"
  64. " uv0 = mix(copy_position.xy, copy_position.zw, rawpos) / vec2(textureSize(samp9, 0).xy);\n"
  65. "}\n";
  66. const char *FProgramRgbToYuyv =
  67. "SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n"
  68. "in vec2 uv0;\n"
  69. "out vec4 ocol0;\n"
  70. "void main()\n"
  71. "{\n"
  72. " vec3 c0 = texture(samp9, vec3(uv0 - dFdx(uv0) * 0.25, 0.0)).rgb;\n"
  73. " vec3 c1 = texture(samp9, vec3(uv0 + dFdx(uv0) * 0.25, 0.0)).rgb;\n"
  74. " vec3 c01 = (c0 + c1) * 0.5;\n"
  75. " vec3 y_const = vec3(0.257,0.504,0.098);\n"
  76. " vec3 u_const = vec3(-0.148,-0.291,0.439);\n"
  77. " vec3 v_const = vec3(0.439,-0.368,-0.071);\n"
  78. " vec4 const3 = vec4(0.0625,0.5,0.0625,0.5);\n"
  79. " ocol0 = vec4(dot(c1,y_const),dot(c01,u_const),dot(c0,y_const),dot(c01, v_const)) + const3;\n"
  80. "}\n";
  81. ProgramShaderCache::CompileShader(s_rgbToYuyvProgram, VProgramRgbToYuyv, FProgramRgbToYuyv);
  82. s_rgbToYuyvUniform_loc = glGetUniformLocation(s_rgbToYuyvProgram.glprogid, "copy_position");
  83. /* TODO: Accuracy Improvements
  84. *
  85. * The YVYU to RGB conversion here matches the RGB to YUYV done above, but
  86. * if a game modifies or adds images to the XFB then it should be using the
  87. * same algorithm as the flipper, and could result in slight color inaccuracies
  88. * when run back through this shader.
  89. */
  90. const char *VProgramYuyvToRgb =
  91. "void main()\n"
  92. "{\n"
  93. " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n"
  94. " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n"
  95. "}\n";
  96. const char *FProgramYuyvToRgb =
  97. "SAMPLER_BINDING(9) uniform sampler2D samp9;\n"
  98. "in vec2 uv0;\n"
  99. "out vec4 ocol0;\n"
  100. "void main()\n"
  101. "{\n"
  102. " ivec2 uv = ivec2(gl_FragCoord.xy);\n"
  103. // We switch top/bottom here. TODO: move this to screen blit.
  104. " ivec2 ts = textureSize(samp9, 0);\n"
  105. " vec4 c0 = texelFetch(samp9, ivec2(uv.x>>1, ts.y-uv.y-1), 0);\n"
  106. " float y = mix(c0.b, c0.r, (uv.x & 1) == 1);\n"
  107. " float yComp = 1.164 * (y - 0.0625);\n"
  108. " float uComp = c0.g - 0.5;\n"
  109. " float vComp = c0.a - 0.5;\n"
  110. " ocol0 = vec4(yComp + (1.596 * vComp),\n"
  111. " yComp - (0.813 * vComp) - (0.391 * uComp),\n"
  112. " yComp + (2.018 * uComp),\n"
  113. " 1.0);\n"
  114. "}\n";
  115. ProgramShaderCache::CompileShader(s_yuyvToRgbProgram, VProgramYuyvToRgb, FProgramYuyvToRgb);
  116. }
  117. static SHADER &GetOrCreateEncodingShader(u32 format)
  118. {
  119. if (format >= NUM_ENCODING_PROGRAMS)
  120. {
  121. PanicAlert("Unknown texture copy format: 0x%x\n", format);
  122. return s_encodingPrograms[0];
  123. }
  124. if (s_encodingPrograms[format].glprogid == 0)
  125. {
  126. const char* shader = TextureConversionShader::GenerateEncodingShader(format, API_OPENGL);
  127. #if defined(_DEBUG) || defined(DEBUGFAST)
  128. if (g_ActiveConfig.iLog & CONF_SAVESHADERS && shader)
  129. {
  130. static int counter = 0;
  131. std::string filename = StringFromFormat("%senc_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++);
  132. SaveData(filename, shader);
  133. }
  134. #endif
  135. const char *VProgram =
  136. "void main()\n"
  137. "{\n"
  138. " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n"
  139. " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n"
  140. "}\n";
  141. ProgramShaderCache::CompileShader(s_encodingPrograms[format], VProgram, shader);
  142. s_encodingUniforms[format] = glGetUniformLocation(s_encodingPrograms[format].glprogid, "position");
  143. }
  144. return s_encodingPrograms[format];
  145. }
  146. void Init()
  147. {
  148. glGenFramebuffers(2, s_texConvFrameBuffer);
  149. glActiveTexture(GL_TEXTURE9);
  150. glGenTextures(1, &s_srcTexture);
  151. glBindTexture(GL_TEXTURE_2D, s_srcTexture);
  152. glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
  153. glGenTextures(1, &s_dstTexture);
  154. glBindTexture(GL_TEXTURE_2D, s_dstTexture);
  155. glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
  156. glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, renderBufferWidth, renderBufferHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
  157. FramebufferManager::SetFramebuffer(s_texConvFrameBuffer[0]);
  158. glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, s_dstTexture, 0);
  159. FramebufferManager::SetFramebuffer(0);
  160. glGenBuffers(1, &s_PBO);
  161. CreatePrograms();
  162. }
  163. void Shutdown()
  164. {
  165. glDeleteTextures(1, &s_srcTexture);
  166. glDeleteTextures(1, &s_dstTexture);
  167. glDeleteBuffers(1, &s_PBO);
  168. glDeleteFramebuffers(2, s_texConvFrameBuffer);
  169. s_rgbToYuyvProgram.Destroy();
  170. s_yuyvToRgbProgram.Destroy();
  171. for (auto& program : s_encodingPrograms)
  172. program.Destroy();
  173. s_srcTexture = 0;
  174. s_dstTexture = 0;
  175. s_PBO = 0;
  176. s_texConvFrameBuffer[0] = 0;
  177. s_texConvFrameBuffer[1] = 0;
  178. }
  179. static void EncodeToRamUsingShader(GLuint srcTexture,
  180. u8* destAddr, int dstWidth, int dstHeight, int readStride,
  181. bool linearFilter)
  182. {
  183. // switch to texture converter frame buffer
  184. // attach render buffer as color destination
  185. FramebufferManager::SetFramebuffer(s_texConvFrameBuffer[0]);
  186. OpenGL_BindAttributelessVAO();
  187. // set source texture
  188. glActiveTexture(GL_TEXTURE9);
  189. glBindTexture(GL_TEXTURE_2D_ARRAY, srcTexture);
  190. if (linearFilter)
  191. g_sampler_cache->BindLinearSampler(9);
  192. else
  193. g_sampler_cache->BindNearestSampler(9);
  194. glViewport(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight);
  195. glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
  196. // .. and then read back the results.
  197. // TODO: make this less slow.
  198. int writeStride = bpmem.copyMipMapStrideChannels * 32;
  199. int dstSize = dstWidth*dstHeight*4;
  200. int readHeight = readStride / dstWidth / 4; // 4 bytes per pixel
  201. int readLoops = dstHeight / readHeight;
  202. if (writeStride != readStride && readLoops > 1)
  203. {
  204. // writing to a texture of a different size
  205. // also copy more then one block line, so the different strides matters
  206. // copy into one pbo first, map this buffer, and then memcpy into GC memory
  207. // in this way, we only have one vram->ram transfer, but maybe a bigger
  208. // CPU overhead because of the pbo
  209. glBindBuffer(GL_PIXEL_PACK_BUFFER, s_PBO);
  210. glBufferData(GL_PIXEL_PACK_BUFFER, dstSize, nullptr, GL_STREAM_READ);
  211. glReadPixels(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight, GL_BGRA, GL_UNSIGNED_BYTE, nullptr);
  212. u8* pbo = (u8*)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, dstSize, GL_MAP_READ_BIT);
  213. for (int i = 0; i < readLoops; i++)
  214. {
  215. memcpy(destAddr, pbo, readStride);
  216. pbo += readStride;
  217. destAddr += writeStride;
  218. }
  219. glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
  220. glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
  221. }
  222. else
  223. {
  224. glReadPixels(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight, GL_BGRA, GL_UNSIGNED_BYTE, destAddr);
  225. }
  226. }
  227. int EncodeToRamFromTexture(u32 address,GLuint source_texture, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyfmt, int bScaleByHalf, const EFBRectangle& source)
  228. {
  229. u32 format = copyfmt;
  230. if (bFromZBuffer)
  231. {
  232. format |= _GX_TF_ZTF;
  233. if (copyfmt == 11)
  234. format = GX_TF_Z16;
  235. else if (format < GX_TF_Z8 || format > GX_TF_Z24X8)
  236. format |= _GX_TF_CTF;
  237. }
  238. else
  239. {
  240. if (copyfmt > GX_TF_RGBA8 || (copyfmt < GX_TF_RGB565 && !bIsIntensityFmt))
  241. format |= _GX_TF_CTF;
  242. }
  243. SHADER& texconv_shader = GetOrCreateEncodingShader(format);
  244. u8 *dest_ptr = Memory::GetPointer(address);
  245. int width = (source.right - source.left) >> bScaleByHalf;
  246. int height = (source.bottom - source.top) >> bScaleByHalf;
  247. int size_in_bytes = TexDecoder_GetTextureSizeInBytes(width, height, format);
  248. u16 blkW = TexDecoder_GetBlockWidthInTexels(format) - 1;
  249. u16 blkH = TexDecoder_GetBlockHeightInTexels(format) - 1;
  250. // only copy on cache line boundaries
  251. // extra pixels are copied but not displayed in the resulting texture
  252. s32 expandedWidth = (width + blkW) & (~blkW);
  253. s32 expandedHeight = (height + blkH) & (~blkH);
  254. texconv_shader.Bind();
  255. glUniform4i(s_encodingUniforms[format],
  256. source.left, source.top,
  257. expandedWidth, bScaleByHalf ? 2 : 1);
  258. unsigned int numBlocksX = expandedWidth / TexDecoder_GetBlockWidthInTexels(format);
  259. unsigned int numBlocksY = expandedHeight / TexDecoder_GetBlockHeightInTexels(format);
  260. unsigned int cacheLinesPerRow;
  261. if ((format & 0x0f) == 6)
  262. cacheLinesPerRow = numBlocksX * 2;
  263. else
  264. cacheLinesPerRow = numBlocksX;
  265. EncodeToRamUsingShader(source_texture,
  266. dest_ptr, cacheLinesPerRow * 8, numBlocksY, cacheLinesPerRow * 32,
  267. bScaleByHalf > 0 && !bFromZBuffer);
  268. return size_in_bytes; // TODO: D3D11 is calculating this value differently!
  269. }
  270. void EncodeToRamYUYV(GLuint srcTexture, const TargetRectangle& sourceRc, u8* destAddr, int dstWidth, int dstHeight)
  271. {
  272. g_renderer->ResetAPIState();
  273. s_rgbToYuyvProgram.Bind();
  274. glUniform4f(s_rgbToYuyvUniform_loc, static_cast<float>(sourceRc.left), static_cast<float>(sourceRc.top),
  275. static_cast<float>(sourceRc.right), static_cast<float>(sourceRc.bottom));
  276. // We enable linear filtering, because the GameCube does filtering in the vertical direction when
  277. // yscale is enabled.
  278. // Otherwise we get jaggies when a game uses yscaling (most PAL games)
  279. EncodeToRamUsingShader(srcTexture, destAddr, dstWidth / 2, dstHeight, dstWidth*dstHeight*2, true);
  280. FramebufferManager::SetFramebuffer(0);
  281. TextureCache::DisableStage(0);
  282. g_renderer->RestoreAPIState();
  283. }
  284. // Should be scale free.
  285. void DecodeToTexture(u32 xfbAddr, int srcWidth, int srcHeight, GLuint destTexture)
  286. {
  287. u8* srcAddr = Memory::GetPointer(xfbAddr);
  288. if (!srcAddr)
  289. {
  290. WARN_LOG(VIDEO, "Tried to decode from invalid memory address");
  291. return;
  292. }
  293. g_renderer->ResetAPIState(); // reset any game specific settings
  294. OpenGL_BindAttributelessVAO();
  295. // switch to texture converter frame buffer
  296. // attach destTexture as color destination
  297. FramebufferManager::SetFramebuffer(s_texConvFrameBuffer[1]);
  298. FramebufferManager::FramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D_ARRAY, destTexture, 0);
  299. // activate source texture
  300. // set srcAddr as data for source texture
  301. glActiveTexture(GL_TEXTURE9);
  302. glBindTexture(GL_TEXTURE_2D, s_srcTexture);
  303. glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, srcWidth / 2, srcHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, srcAddr);
  304. g_sampler_cache->BindNearestSampler(9);
  305. glViewport(0, 0, srcWidth, srcHeight);
  306. s_yuyvToRgbProgram.Bind();
  307. glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
  308. FramebufferManager::SetFramebuffer(0);
  309. g_renderer->RestoreAPIState();
  310. }
  311. } // namespace
  312. } // namespace OGL