Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

How to convert LDS memory address to the address can be passed into DS_READ_* and DS_WRITE_* instructions? #1455

Open
terU3760 opened this issue Mar 31, 2022 · 2 comments

Comments

@terU3760
Copy link

terU3760 commented Mar 31, 2022

Have followed here to write the following code:
The hip file:

#include "hip/hip_runtime.h"
#include "hip/hcc_detail/device_library_decls.h"

__global__ void halfVec_v_pk_sts_then_lds( uint16_t * dst , uint32_t * ptr , uint16_t * val , int n )
{

    __shared__ uint16_t temp_local_share_memory_allocated[5];

    int id = blockIdx.x * blockDim.x + threadIdx.x;

    if ( id < n )
    {

        asm volatile("DS_WRITE_B16 %0, %1;\nS_WAITCNT lgkmcnt(0);\n" : : "v"( __to_local( &temp_local_share_memory_allocated[ptr[id]] ) ), "v"(val[id]) );
        printf("Store value: %d into local shared memory address: %d.\n", val , ptr[id] );

        asm volatile("DS_READ_U16 %0, %1;\nS_WAITCNT lgkmcnt(0);\n" : "=v"(dst[id]) : "v"( __to_local( &temp_local_share_memory_allocated[ptr[id]] ) ));
        printf("Load value: %d from local shared memory address: %d.\n", dst[id] , ptr[id] );

    }

}

The cpp file:

#pragma once

#include <assert.h>
#include <stdint.h>
#include <stdlib.h>

#include "test_simple_kernel.hip"

int main(int argc,char**vargs)
{

    uint16_t* to_be_stored = new uint16_t[1];
    uint16_t* to_be_loaded = new uint16_t[1];
    uint32_t* local_share_memory_address = new uint32_t[1];

    uint16_t* to_be_stored_d;
    uint16_t* to_be_loaded_d;
    uint32_t* local_share_memory_address_d;

    to_be_stored[ 0 ] = 1;
    local_share_memory_address[ 0 ] = 0;
    hipSetDevice(0);
    hipMalloc(&to_be_stored_d, 2);
    hipMalloc(&to_be_loaded_d, 2);
    hipMalloc(&local_share_memory_address_d, 4);
    hipMemcpy(to_be_stored_d, to_be_stored, 2, hipMemcpyHostToDevice);
    hipMemcpy(local_share_memory_address_d, local_share_memory_address, 4, hipMemcpyHostToDevice);
    printf("Stored value: %d at local share memory address: %d.\n" , to_be_stored[ 0 ] , local_share_memory_address[ 0 ] );
    hipLaunchKernelGGL( halfVec_v_pk_sts_then_lds , dim3( 1 ) , dim3( 1 ) , sizeof(uint16_t), 0, (uint16_t*)(to_be_loaded_d), (uint32_t*)(local_share_memory_address_d) , (uint16_t*)(to_be_stored_d) , 1 );
    hipMemcpy(to_be_loaded, to_be_loaded_d, 2, hipMemcpyDeviceToHost);
    hipDeviceSynchronize();
    printf("And then load value: %d from local share memory address: %d.\n", to_be_loaded[ 0 ] , local_share_memory_address[ 0 ] );

}

The code just doesn't compile! How to correct it? Thanks in advance!

@b-sumner
Copy link

b-sumner commented Mar 31, 2022

Accessing __shared__ memory via inline ASM is not recommended and will probably cause more harm than good. What problem are you trying to solve? Why not simply code temp_local...[i]?

@terU3760
Copy link
Author

terU3760 commented Apr 1, 2022

@b-sumner I'm trying to test and let the inline assembly working. How to make this program(inline assembly) workable? Or could you please provide a program architecture to let these inline assembly workable?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants