// Generated from libavcodec/vulkan/prores_raw.comp
const char *ff_source_prores_raw_comp =
"/*\n"
" * ProRes RAW decoder\n"
" *\n"
" * Copyright (c) 2025 Lynne <dev@lynne.ee>\n"
" *\n"
" * This file is part of FFmpeg.\n"
" *\n"
" * FFmpeg is free software; you can redistribute it and/or\n"
" * modify it under the terms of the GNU Lesser General Public\n"
" * License as published by the Free Software Foundation; either\n"
" * version 2.1 of the License, or (at your option) any later version.\n"
" *\n"
" * FFmpeg is distributed in the hope that it will be useful,\n"
" * but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n"
" * Lesser General Public License for more details.\n"
" *\n"
" * You should have received a copy of the GNU Lesser General Public\n"
" * License along with FFmpeg; if not, write to the Free Software\n"
" * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\n"
" */\n"
"\n"
"#define I16(x) (int16_t(x))\n"
"\n"
"#define COMP_ID (gl_LocalInvocationID.z)\n"
"#define BLOCK_ID (gl_LocalInvocationID.y)\n"
"#define ROW_ID (gl_LocalInvocationID.x)\n"
"\n"
"GetBitContext gb;\n"
"shared float btemp[gl_WorkGroupSize.z][16][64] = { };\n"
"shared float block[gl_WorkGroupSize.z][16][64];\n"
"\n"
"void idct8_horiz(const uint row_id)\n"
"{\n"
"    float t0, t1, t2, t3, t4, t5, t6, t7, u8;\n"
"    float u0, u1, u2, u3, u4, u5, u6, u7;\n"
"\n"
"    /* Input */\n"
"    t0 = block[COMP_ID][BLOCK_ID][8*row_id + 0];\n"
"    u4 = block[COMP_ID][BLOCK_ID][8*row_id + 1];\n"
"    t2 = block[COMP_ID][BLOCK_ID][8*row_id + 2];\n"
"    u6 = block[COMP_ID][BLOCK_ID][8*row_id + 3];\n"
"    t1 = block[COMP_ID][BLOCK_ID][8*row_id + 4];\n"
"    u5 = block[COMP_ID][BLOCK_ID][8*row_id + 5];\n"
"    t3 = block[COMP_ID][BLOCK_ID][8*row_id + 6];\n"
"    u7 = block[COMP_ID][BLOCK_ID][8*row_id + 7];\n"
"\n"
"    /* Embedded scaled inverse 4-point Type-II DCT */\n"
"    u0 = t0 + t1;\n"
"    u1 = t0 - t1;\n"
"    u3 = t2 + t3;\n"
"    u2 = (t2 - t3)*(1.4142135623730950488016887242097f) - u3;\n"
"    t0 = u0 + u3;\n"
"    t3 = u0 - u3;\n"
"    t1 = u1 + u2;\n"
"    t2 = u1 - u2;\n"
"\n"
"    /* Embedded scaled inverse 4-point Type-IV DST */\n"
"    t5 = u5 + u6;\n"
"    t6 = u5 - u6;\n"
"    t7 = u4 + u7;\n"
"    t4 = u4 - u7;\n"
"    u7 = t7 + t5;\n"
"    u5 = (t7 - t5)*(1.4142135623730950488016887242097f);\n"
"    u8 = (t4 + t6)*(1.8477590650225735122563663787936f);\n"
"    u4 = u8 - t4*(1.0823922002923939687994464107328f);\n"
"    u6 = u8 - t6*(2.6131259297527530557132863468544f);\n"
"    t7 = u7;\n"
"    t6 = t7 - u6;\n"
"    t5 = t6 + u5;\n"
"    t4 = t5 - u4;\n"
"\n"
"    /* Butterflies */\n"
"    u0 = t0 + t7;\n"
"    u7 = t0 - t7;\n"
"    u6 = t1 + t6;\n"
"    u1 = t1 - t6;\n"
"    u2 = t2 + t5;\n"
"    u5 = t2 - t5;\n"
"    u4 = t3 + t4;\n"
"    u3 = t3 - t4;\n"
"\n"
"    /* Output */\n"
"    btemp[COMP_ID][BLOCK_ID][0*8 + row_id] = u0;\n"
"    btemp[COMP_ID][BLOCK_ID][1*8 + row_id] = u1;\n"
"    btemp[COMP_ID][BLOCK_ID][2*8 + row_id] = u2;\n"
"    btemp[COMP_ID][BLOCK_ID][3*8 + row_id] = u3;\n"
"    btemp[COMP_ID][BLOCK_ID][4*8 + row_id] = u4;\n"
"    btemp[COMP_ID][BLOCK_ID][5*8 + row_id] = u5;\n"
"    btemp[COMP_ID][BLOCK_ID][6*8 + row_id] = u6;\n"
"    btemp[COMP_ID][BLOCK_ID][7*8 + row_id] = u7;\n"
"}\n"
"\n"
"void idct8_vert(const uint row_id)\n"
"{\n"
"    float t0, t1, t2, t3, t4, t5, t6, t7, u8;\n"
"    float u0, u1, u2, u3, u4, u5, u6, u7;\n"
"\n"
"    /* Input */\n"
"    t0 = btemp[COMP_ID][BLOCK_ID][8*row_id + 0] + 0.5f; // NOTE\n"
"    u4 = btemp[COMP_ID][BLOCK_ID][8*row_id + 1];\n"
"    t2 = btemp[COMP_ID][BLOCK_ID][8*row_id + 2];\n"
"    u6 = btemp[COMP_ID][BLOCK_ID][8*row_id + 3];\n"
"    t1 = btemp[COMP_ID][BLOCK_ID][8*row_id + 4];\n"
"    u5 = btemp[COMP_ID][BLOCK_ID][8*row_id + 5];\n"
"    t3 = btemp[COMP_ID][BLOCK_ID][8*row_id + 6];\n"
"    u7 = btemp[COMP_ID][BLOCK_ID][8*row_id + 7];\n"
"\n"
"    /* Embedded scaled inverse 4-point Type-II DCT */\n"
"    u0 = t0 + t1;\n"
"    u1 = t0 - t1;\n"
"    u3 = t2 + t3;\n"
"    u2 = (t2 - t3)*(1.4142135623730950488016887242097f) - u3;\n"
"    t0 = u0 + u3;\n"
"    t3 = u0 - u3;\n"
"    t1 = u1 + u2;\n"
"    t2 = u1 - u2;\n"
"\n"
"    /* Embedded scaled inverse 4-point Type-IV DST */\n"
"    t5 = u5 + u6;\n"
"    t6 = u5 - u6;\n"
"    t7 = u4 + u7;\n"
"    t4 = u4 - u7;\n"
"    u7 = t7 + t5;\n"
"    u5 = (t7 - t5)*(1.4142135623730950488016887242097f);\n"
"    u8 = (t4 + t6)*(1.8477590650225735122563663787936f);\n"
"    u4 = u8 - t4*(1.0823922002923939687994464107328f);\n"
"    u6 = u8 - t6*(2.6131259297527530557132863468544f);\n"
"    t7 = u7;\n"
"    t6 = t7 - u6;\n"
"    t5 = t6 + u5;\n"
"    t4 = t5 - u4;\n"
"\n"
"    /* Butterflies */\n"
"    u0 = t0 + t7;\n"
"    u7 = t0 - t7;\n"
"    u6 = t1 + t6;\n"
"    u1 = t1 - t6;\n"
"    u2 = t2 + t5;\n"
"    u5 = t2 - t5;\n"
"    u4 = t3 + t4;\n"
"    u3 = t3 - t4;\n"
"\n"
"    /* Output */\n"
"    block[COMP_ID][BLOCK_ID][0*8 + row_id] = u0;\n"
"    block[COMP_ID][BLOCK_ID][1*8 + row_id] = u1;\n"
"    block[COMP_ID][BLOCK_ID][2*8 + row_id] = u2;\n"
"    block[COMP_ID][BLOCK_ID][3*8 + row_id] = u3;\n"
"    block[COMP_ID][BLOCK_ID][4*8 + row_id] = u4;\n"
"    block[COMP_ID][BLOCK_ID][5*8 + row_id] = u5;\n"
"    block[COMP_ID][BLOCK_ID][6*8 + row_id] = u6;\n"
"    block[COMP_ID][BLOCK_ID][7*8 + row_id] = u7;\n"
"}\n"
"\n"
"int16_t get_value(int16_t codebook)\n"
"{\n"
"    const int16_t switch_bits = codebook >> 8;\n"
"    const int16_t rice_order  = codebook & I16(0xf);\n"
"    const int16_t exp_order   = (codebook >> 4) & I16(0xf);\n"
"\n"
"    uint32_t b = show_bits(gb, 32);\n"
"    if (expectEXT(b == 0, false))\n"
"        return I16(0);\n"
"    int16_t q = I16(31) - I16(findMSB(b));\n"
"\n"
"    if ((b & 0x80000000) != 0) {\n"
"        skip_bits(gb, 1 + rice_order);\n"
"        return I16((b & 0x7FFFFFFF) >> (31 - rice_order));\n"
"    }\n"
"\n"
"    if (q <= switch_bits) {\n"
"        skip_bits(gb, q + rice_order + 1);\n"
"        return I16((q << rice_order) +\n"
"                   (((b << (q + 1)) >> 1) >> (31 - rice_order)));\n"
"    }\n"
"\n"
"    int16_t bits = exp_order + (q << 1) - switch_bits;\n"
"    skip_bits(gb, bits);\n"
"    return I16((b >> (32 - bits)) +\n"
"               ((switch_bits + 1) << rice_order) -\n"
"               (1 << exp_order));\n"
"}\n"
"\n"
"#define TODCCODEBOOK(x) ((x + 1) >> 1)\n"
"\n"
"void read_dc_vals(const uint nb_blocks)\n"
"{\n"
"    int16_t dc, dc_add;\n"
"    int16_t prev_dc = I16(0), sign = I16(0);\n"
"\n"
"    /* Special handling for first block */\n"
"    dc = get_value(I16(700));\n"
"    prev_dc = (dc >> 1) ^ -(dc & I16(1));\n"
"    btemp[COMP_ID][0][0] = prev_dc;\n"
"\n"
"    for (uint n = 1; n < nb_blocks; n++) {\n"
"        if (expectEXT(left_bits(gb) <= 0, false))\n"
"            break;\n"
"\n"
"        uint8_t dc_codebook;\n"
"        if ((n & 15) == 1)\n"
"            dc_codebook = uint8_t(100);\n"
"        else\n"
"            dc_codebook = dc_cb[min(TODCCODEBOOK(dc), 13 - 1)];\n"
"\n"
"        dc = get_value(dc_codebook);\n"
"\n"
"        sign = sign ^ dc & int16_t(1);\n"
"        dc_add = (-sign ^ I16(TODCCODEBOOK(dc))) + sign;\n"
"        sign = I16(dc_add < 0);\n"
"        prev_dc += dc_add;\n"
"\n"
"        btemp[COMP_ID][n][0] = prev_dc;\n"
"    }\n"
"}\n"
"\n"
"void read_ac_vals(const uint nb_blocks)\n"
"{\n"
"    const uint nb_codes = nb_blocks << 6;\n"
"    const uint log2_nb_blocks = findMSB(nb_blocks);\n"
"    const uint block_mask = (1 << log2_nb_blocks) - 1;\n"
"\n"
"    int16_t ac, rn, ln;\n"
"    int16_t ac_codebook = I16(49);\n"
"    int16_t rn_codebook = I16( 0);\n"
"    int16_t ln_codebook = I16(66);\n"
"    int16_t sign;\n"
"    int16_t val;\n"
"\n"
"    for (uint n = nb_blocks; n <= nb_codes;) {\n"
"        if (expectEXT(left_bits(gb) <= 0, false))\n"
"            break;\n"
"\n"
"        ln = get_value(ln_codebook);\n"
"        for (uint i = 0; i < ln; i++) {\n"
"            if (expectEXT(left_bits(gb) <= 0, false))\n"
"                break;\n"
"\n"
"            if (expectEXT(n >= nb_codes, false))\n"
"                break;\n"
"\n"
"            ac = get_value(ac_codebook);\n"
"            ac_codebook = ac_cb[min(ac, 95 - 1)];\n"
"            sign = -int16_t(get_bit(gb));\n"
"\n"
"            val = ((ac + I16(1)) ^ sign) - sign;\n"
"            btemp[COMP_ID][n & block_mask][n >> log2_nb_blocks] = val;\n"
"\n"
"            n++;\n"
"        }\n"
"\n"
"        if (expectEXT(n >= nb_codes, false))\n"
"            break;\n"
"\n"
"        rn = get_value(rn_codebook);\n"
"        rn_codebook = rn_cb[min(rn, 28 - 1)];\n"
"\n"
"        n += rn + 1;\n"
"        if (expectEXT(n >= nb_codes, false))\n"
"            break;\n"
"\n"
"        if (expectEXT(left_bits(gb) <= 0, false))\n"
"            break;\n"
"\n"
"        ac = get_value(ac_codebook);\n"
"        sign = -int16_t(get_bit(gb));\n"
"\n"
"        val = ((ac + I16(1)) ^ sign) - sign;\n"
"        btemp[COMP_ID][n & block_mask][n >> log2_nb_blocks] = val;\n"
"\n"
"        ac_codebook = ac_cb[min(ac, 95 - 1)];\n"
"        ln_codebook = ln_cb[min(ac, 15 - 1)];\n"
"\n"
"        n++;\n"
"    }\n"
"}\n"
"\n"
"void main(void)\n"
"{\n"
"    const uint tile_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;\n"
"    TileData td = tile_data[tile_idx];\n"
"\n"
"    if (expectEXT(td.pos.x >= frame_size.x, false))\n"
"        return;\n"
"\n"
"    uint64_t pkt_offset = uint64_t(pkt_data) + td.offset;\n"
"    u8vec2buf hdr_data = u8vec2buf(pkt_offset);\n"
"    float qscale = float(pack16(hdr_data[0].v.yx)) / 2.0f;\n"
"\n"
"    ivec4 size = ivec4(td.size,\n"
"                       pack16(hdr_data[2].v.yx),\n"
"                       pack16(hdr_data[1].v.yx),\n"
"                       pack16(hdr_data[3].v.yx));\n"
"    size[0] = size[0] - size[1] - size[2] - size[3] - 8;\n"
"    if (expectEXT(size[0] < 0, false))\n"
"        return;\n"
"\n"
"    const ivec2 offs = td.pos + ivec2(COMP_ID & 1, COMP_ID >> 1);\n"
"    const uint w = min(tile_size.x, frame_size.x - td.pos.x) / 2;\n"
"    const uint nb_blocks = w / 8;\n"
"\n"
"    const ivec4 comp_offset = ivec4(size[2] + size[1] + size[3],\n"
"                                    size[2],\n"
"                                    0,\n"
"                                    size[2] + size[1]);\n"
"\n"
"    if (BLOCK_ID == 0 && ROW_ID == 0) {\n"
"        init_get_bits(gb, u8buf(pkt_offset + 8 + comp_offset[COMP_ID]),\n"
"                      size[COMP_ID]);\n"
"        read_dc_vals(nb_blocks);\n"
"        read_ac_vals(nb_blocks);\n"
"    }\n"
"\n"
"    barrier();\n"
"\n"
"    [[unroll]]\n"
"    for (uint i = gl_LocalInvocationID.x; i < 64; i += gl_WorkGroupSize.x)\n"
"        block[COMP_ID][BLOCK_ID][i] = (btemp[COMP_ID][BLOCK_ID][scan[i]] / 16384.0) *\n"
"                                      (float(qmat[i]) / 295.0) *\n"
"                                      idct_8x8_scales[i] * qscale;\n"
"\n"
"    barrier();\n"
"\n"
"#ifdef PARALLEL_ROWS\n"
"    idct8_horiz(ROW_ID);\n"
"\n"
"    barrier();\n"
"\n"
"    idct8_vert(ROW_ID);\n"
"#else\n"
"    for (uint j = 0; j < 8; j++)\n"
"        idct8_horiz(j);\n"
"\n"
"    barrier();\n"
"\n"
"    for (uint j = 0; j < 8; j++)\n"
"        idct8_vert(j);\n"
"#endif\n"
"\n"
"    barrier();\n"
"\n"
"    [[unroll]]\n"
"    for (uint i = gl_LocalInvocationID.x; i < 64; i += gl_WorkGroupSize.x)\n"
"         imageStore(dst,\n"
"                    offs + 2*ivec2(BLOCK_ID*8 + (i & 7), i >> 3),\n"
"                    vec4(block[COMP_ID][BLOCK_ID][i]));\n"
"}\n"
;
