Source code for lyceanem.raycasting.rayfunctions

# -*- coding: utf-8 -*-
import copy
import math
from timeit import default_timer as timer

import meshio
import numba as nb
import numpy as np
import scipy.stats
from matplotlib import cm
from numba import cuda, float32, jit, njit, guvectorize, prange
from scipy.spatial import distance

import lyceanem.base_types as base_types
import lyceanem.electromagnetics.empropagation as EM
import lyceanem.geometry.targets as TG
from ..utility import math_functions as math_functions

EPSILON = 1e-7  # how close to zero do we consider zero? example used 1e-7


@cuda.jit(device=True)
def dot(ax1, ay1, az1, ax2, ay2, az2):
    result = ax1 * ax2 + ay1 * ay2 + az1 * az2
    return result


@cuda.jit(device=True)
def cross(ax1, ay1, az1, ax2, ay2, az2):
    rx = ay1 * az2 - az1 * ay2
    ry = az1 * ax2 - ax1 * az2
    rz = ax1 * ay2 - ay1 * ax2
    return rx, ry, rz


# @cuda.jit(boolean,float32(ray_t, triangle_t), device=True, inline=False)
@cuda.jit(device=True, inline=False)
def hit(ray, triangle):
    """Compute compute whether the defined ray will intersect with the defined triangle
    using the  Möller–Trumbore ray-triangle intersection algorithm
    """
    # find edge vectors
    e1x = triangle.v1x - triangle.v0x
    e1y = triangle.v1y - triangle.v0y
    e1z = triangle.v1z - triangle.v0z

    e2x = triangle.v2x - triangle.v0x
    e2y = triangle.v2y - triangle.v0y
    e2z = triangle.v2z - triangle.v0z
    # calculate determinant cross product of DV and E2
    pvecx, pvecy, pvecz = cross(ray.dx, ray.dy, ray.dz, e2x, e2y, e2z)
    # pvecx=ray.dy*e2z-ray.dz*e2y
    # pvecy=ray.dz*e2x-ray.dx*e2z
    # pvecz=ray.dx*e2y-ray.dy*e2x
    # determinant is dot product of edge 1 and pvec
    A = dot(pvecx, pvecy, pvecz, e1x, e1y, e1z)
    # if A is near zero, then ray lies in the plane of the triangle
    if -EPSILON < A < EPSILON:
        # print('miss')
        return False, math.inf

    # if A is less than zero, then the ray is coming from behind the triangle
    # don't cull backface triangles
    # if A < 0:
    #    return False, math.inf
    # calculate distance from vertice 0 to ray origin
    tvecx = ray.ox - triangle.v0x  # s
    tvecy = ray.oy - triangle.v0y  # s
    tvecz = ray.oz - triangle.v0z  # s
    # inverse determinant and calculate bounds of triangle
    F = 1.0 / A
    U = F * dot(tvecx, tvecy, tvecz, pvecx, pvecy, pvecz)
    # U=F*(tvecx*pvecx+tvecy*pvecy+tvecz*pvecz)
    # print('U',U)
    if U < 0.0 or U > (1.0):
        # in U coordinates, if U is less than 0 or greater than 1, it is outside the triangle
        # print('miss')
        return False, math.inf

    # cross product of tvec and E1
    qx, qy, qz = cross(tvecx, tvecy, tvecz, e1x, e1y, e1z)
    # qx=tvecy*e1z-tvecz*e1y
    # qy=tvecz*e1x-tvecx*e1z
    # qz=tvecx*e1y-tvecy*e1x
    V = F * dot(ray.dx, ray.dy, ray.dz, qx, qy, qz)
    # print('V,V+U',V,V+U)
    # V=F*(ray.dx*qx+ray.dy*qy+ray.dz*qz)
    if V < 0.0 or (U + V) > (1.0):
        # in UV coordinates, intersection is within triangle
        # print('miss')
        return False, math.inf

    # intersect_distance = F*(e2x*qx + e2y*qy + e2z*qz)
    intersect_distance = F * dot(e2x, e2y, e2z, qx, qy, qz)
    # print('dist',intersect_distance)
    # if (intersect_distance>EPSILON and intersect_distance<(1-EPSILON)):
    if (intersect_distance > (2 * EPSILON)) and (
        intersect_distance < (ray.dist - (2 * EPSILON))
    ):
        # intersection on triangle
        # print('hit')
        return True, intersect_distance

    return False, math.inf


@cuda.jit
def integratedkernal(ray_index, point_information, environment, ray_flag):
    cu_ray_num = cuda.grid(1)  # alias for threadIdx.x + ( blockIdx.x * blockDim.x ),

    if cu_ray_num < ray_index.shape[0]:
        # there are rays to process but must create and launch on the spot, then if succesful go to the next one until the index has been traversed
        # print(ray_index[cu_ray_num,0],ray_index[cu_ray_num,1],ray_index[cu_ray_num,2])
        # ray_components[cu_ray_num,:]=0.0
        # print(scattering_matrix.shape[0],scattering_matrix.shape[1])
        for i in range(ray_index.shape[1] - 1):
            # print('integrated ray',cu_ray_num)
            if ray_index[cu_ray_num, i + 1] != 0:
                #     #print(i,cu_ray_num,network_index[cu_ray_num,i],network_index[cu_ray_num,i+1])
                #     #convert source point field to ray

                ray = cuda.local.array(shape=(1), dtype=base_types.ray_t)
                point1 = point_information[ray_index[cu_ray_num, i] - 1]
                point2 = point_information[ray_index[cu_ray_num, i + 1] - 1]
                ray[0]["ox"] = point1["px"]
                ray[0]["oy"] = point1["py"]
                ray[0]["oz"] = point1["pz"]
                normconst = math.sqrt(
                    (point2["px"] - point1["px"]) ** 2
                    + (point2["py"] - point1["py"]) ** 2
                    + (point2["pz"] - point1["pz"]) ** 2
                )
                ray[0]["dx"] = (point2["px"] - point1["px"]) / normconst
                ray[0]["dy"] = (point2["py"] - point1["py"]) / normconst
                ray[0]["dz"] = (point2["pz"] - point1["pz"]) / normconst
                ray[0]["dist"] = normconst
                ray[0]["intersect"] = False
                # ray=rayprep(point1,point2,ray)
                # print('integrated ray',cu_ray_num,i,normconst)
                for tri_inc in range(len(environment)):
                    hit_bool, dist_temp = hit(ray[0], environment[tri_inc])
                    if hit_bool and (dist_temp < ray[0]["dist"]):
                        # hit something, so break out of loop and null index the first entry of the ray_index
                        ray[0]["dist"] = dist_temp
                        ray[0]["intersect"] = True

            # if ray[0]['intersect']:
            #    break

        ray_flag[cu_ray_num] = ray[0]["intersect"]
        # print('integrated',cu_ray_num,i)


@cuda.jit
def kernel1D(rays, environment, ray_flag, distances):
    cu_ray_num = cuda.grid(1)  # alias for threadIdx.x + ( blockIdx.x * blockDim.x ),
    #           threadIdx.y + ( blockIdx.y * blockDim.y )
    # margin=1e-5
    dist_min = math.inf
    intersect_check = False
    max_tri_num = len(environment)
    i = 0  # emulate a C-style for-loop, exposing the idx increment logic
    while i < max_tri_num:
        hit_bool, dist_temp = hit(rays[cu_ray_num], environment[i])
        i += 1
        if hit_bool and (dist_temp < dist_min):
            # hit something
            intersect_check = True
            dist_min = dist_temp

    ray_flag[cu_ray_num] = intersect_check
    distances[cu_ray_num] = dist_min


@cuda.jit
def kernel1Dv2(rays, environment):
    cu_ray_num = cuda.grid(1)  # alias for threadIdx.x + ( blockIdx.x * blockDim.x ),
    #           threadIdx.y + ( blockIdx.y * blockDim.y )
    # margin=1e-5
    # dist_min=math.inf
    # intersect_check=False
    max_tri_num = len(environment)
    i = 0  # emulate a C-style for-loop, exposing the idx increment logic
    while i < max_tri_num:
        hit_bool, dist_temp = hit(rays[cu_ray_num], environment[i])
        i += 1
        # if hit_bool:
        #    print(cu_ray_num)
        # if (cu_ray_num==5):
        #    #if (i==58):
        #    hit_bool, dist_temp = hit(rays[cu_ray_num], environment[i])
        #    print(i,dist_temp)

        if hit_bool and (dist_temp < rays[cu_ray_num]["dist"]):
            # hit something
            # print(cu_ray_num,i,dist_temp,'hit')
            # dist_min=dist_temp
            rays[cu_ray_num]["dist"] = dist_temp
            rays[cu_ray_num]["intersect"] = True
            # ray_flag[cu_ray_num]=True

    # if rays[cu_ray_num]['intersect']==False:
    #    print(cu_ray_num,i,dist_temp,'miss')

    nb.cuda.syncthreads()
    # if (rays[cu_ray_num]['intersect']==True):
    #    print(cu_ray_num,rays[cu_ray_num]['dist'])
    # if (cu_ray_num==1):
    #    print(i)


@cuda.jit
def kernel1Dv3(rays, environment):
    cu_ray_num = cuda.grid(1)  # alias for threadIdx.x + ( blockIdx.x * blockDim.x ),

    if (
        cu_ray_num < rays.shape[0]
    ):  #           threadIdx.y + ( blockIdx.y * blockDim.y )
        # margin=1e-5
        # dist_min=math.inf
        # intersect_check=False
        max_tri_num = len(environment)
        i = 0  # emulate a C-style for-loop, exposing the idx increment logic
        while i < max_tri_num:
            hit_bool, dist_temp = hit(rays[cu_ray_num], environment[i])
            i += 1
            # if hit_bool:
            #    print(cu_ray_num)
            # if (cu_ray_num==5):
            #    #if (i==58):
            #    hit_bool, dist_temp = hit(rays[cu_ray_num], environment[i])
            #    print(i,dist_temp)
            if hit_bool and (dist_temp < rays[cu_ray_num]["dist"]):
                # hit something
                # print(cu_ray_num,i,dist_temp,'hit')
                # dist_min=dist_temp
                # print('conventional',cu_ray_num,i,dist_temp,rays[cu_ray_num]['dist'])
                rays[cu_ray_num]["dist"] = dist_temp
                rays[cu_ray_num]["intersect"] = True

                # ray_flag[cu_ray_num]=True

        # print('conventional',cu_ray_num,dist_temp)
        nb.cuda.syncthreads()


def integratedRaycaster(ray_index, scattering_points, environment_local):
    start = timer()

    prep_dt = timer() - start
    raystart = timer()
    # Create a container for the pixel RGBA information of our image
    chunk_size = 2**11
    threads_in_block = 1024
    # for idx in range(len(triangle_chunk)-1):
    d_environment = cuda.device_array(
        len(environment_local), dtype=base_types.triangle_t
    )
    d_ray_index = cuda.device_array(
        (ray_index.shape[0], ray_index.shape[1]), dtype=np.int32
    )
    d_ray_index = cuda.to_device(ray_index)
    ray_flags = np.full(ray_index.shape[0], False, dtype=bool)
    d_ray_flag = cuda.device_array(ray_index.shape[0], dtype=bool)
    d_ray_flag = cuda.to_device(ray_flags)
    cuda.to_device(environment_local, to=d_environment)
    d_point_information = cuda.device_array(
        scattering_points.shape[0], dtype=base_types.scattering_t
    )
    d_point_information = cuda.to_device(scattering_points)
    grids = math.ceil(ray_index.shape[0] / threads_in_block)
    threads = threads_in_block
    # Execute the kernel
    # cuda.profile_start()
    # kernel1Dv2[grids, threads](d_chunk_payload,d_environment,d_ray_flag)
    integratedkernal[grids, threads](
        d_ray_index, d_point_information, d_environment, d_ray_flag
    )
    # cuda.profile_stop()
    # distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host()
    propagation_index = d_ray_index.copy_to_host()
    ray_flags = d_ray_flag.copy_to_host()
    kernel_dt = timer() - raystart
    start = timer()

    mem_dt = timer() - start
    # deallocate memory on gpu
    # ctx = cuda.current_context()
    # deallocs = ctx.deallocations
    # deallocs.clear()
    # final_index=np.delete(propagation_index,np.where(propagation_index[:,0]==0),axis=0)
    final_index = np.delete(ray_index, ray_flags, axis=0)

    return final_index


# @njit
def patterntocloud(pattern_data, shell_coords, maxarea):
    # takes the pattern_data and shell_coordinates, and creates a point cloud based upon the data.
    import lyceanem.utility.mesh_functions as MF

    point_cloud = MF.points2pointcloud(shell_coords)
    points, elements = np.shape(pattern_data)
    # normdata
    # viridis = cm.get_cmap("viridis", 40)
    visible_elements = np.sum(pattern_data / maxarea, axis=1)
    # np_colors = viridis(visible_elements)
    # point_cloud.point_data["red"] = np_colors[:, 0]
    # point_cloud.point_data["green"] = np_colors[:, 1]
    # point_cloud.point_data["blue"] = np_colors[:, 2]
    point_cloud.point_data["Projected_Area"] = pattern_data

    return point_cloud


[docs] def visiblespace( source_coords, source_normals, environment, vertex_area=0, az_range=None, elev_range=None, shell_range=0.5, ): """ Visiblespace generates a matrix stack of visible space for each element, indexed by source_coordinate. Parameters -------------- source_coords : numpy.ndarray of float xyz coordinates of the sources with shape (n,3) source_normals : numpy.ndarray of float normal vectors for each source point with shape (n,3) environment : :class:`lyceanem.base_classes.triangles` blocking environment vertex_area : numpy.ndarray of float the area associated with each source point, defaults to 0, but can also be specified for each source az_range : numpy.ndarray of float array of azimuth planes in degrees elev_range : numpy.ndarray of float array of elevation points in degrees shell_range: float radius of point cloud shell Returns ------------------- visible_patterns : numpy.ndarray of float 3D antenna patterns resultant_pcd : :type:`meshio.Mesh` colour data to scale the points fractional visibility from the source aperture """ if az_range is None: az_range = np.linspace(-180.0, 180.0, 19) if elev_range is None: elev_range = np.linspace(-90.0, 90.0, 19) azaz, elel = np.meshgrid(az_range, elev_range) sourcenum = source_coords.points.shape[0] sample_shell = TG.spherical_field( az_range, elev_range, outward_normals=True, field_radius=shell_range ) sinks = sample_shell.points sinknum = len(sinks) # # initial_index=np.full((len(source_coords),2),np.nan,dtype=np.int32) # initial_index[:,0]=np.arange(len(source_coords)) # pointingindex=np.reshape(np.arange(0,len(sinks)),(len(az_range),len(elev_range))) # need to create farfield sinks in az,elev coordinates, then convert to xyz sink coordinates, and generate index # missed_points,hit_points,missed_index,hit_index,shadow_rays=chunkingRaycaster1D(source_coords,sinks,np.zeros((1,3),dtype=np.float32),initial_index,environment,1,terminate_flag=True) hit_index, _ = workchunkingv2( source_coords.points, sinks, np.empty((0, 3), dtype=np.float32), environment, 1 ) unified_model = np.append( source_coords.points.astype(np.float32), sinks.astype(np.float32), axis=0 ) # filtered_network2,final_network2,filtered_index2,final_index2,shadow_rays directions = np.zeros((len(hit_index), 3), dtype=np.float32) norm_length = np.zeros((len(hit_index), 1), dtype=np.float32) hit_directions, hit_norms = math_functions.calc_dv_norm( unified_model[hit_index[:, 0].astype(int) - 1, :], unified_model[hit_index[:, 1].astype(int) - 1, :], directions, norm_length, ) # angles=Angular_distance(hit_directions,source_normals[hit_index[:,0].astype(int)-1,:].astype(np.float32)) angles = np.zeros((hit_directions.shape[0], 1), dtype=np.float32) angles = angle_pop( hit_directions, source_normals[hit_index[:, 0].astype(int) - 1, :].astype(np.float32), angles, ) # angles[np.isnan(angles)]=0 vertex_area[np.isnan(vertex_area)] = 0 # visible_patterns=quickpatterncreator(az_range,elev_range,source_coords,angles,vertex_area,hit_index) if len(vertex_area) == 1: if vertex_area == 0: portion = np.zeros((len(angles), 1), dtype=np.float32) portion[:] = 1 else: portion = vertex_area * np.abs(np.cos(angles)) portion[portion < 0] = 0 else: portion = np.ravel(vertex_area[hit_index[:, 0].astype(int) - 1]) * np.abs( np.ravel(np.cos(angles)) ) portion[np.where(np.abs(angles) > (np.pi / 2))[0]] = 0.0 visible_patterns = np.empty((len(az_range) * len(elev_range)), dtype=np.float32) visible_patterns[:] = 0 visible_patterns = patternsort( visible_patterns, sourcenum, sinknum, portion, hit_index ).reshape(len(elev_range), len(az_range)) # shell_coords = np.zeros((len(np.ravel(azaz)), 3), dtype=np.float32) # shell_coords[:, 0], shell_coords[:, 1], shell_coords[:, 2] = azeltocart( # np.ravel(azaz), np.ravel(elel), shell_range # ) maxarea = np.nanmax(np.nanmax(visible_patterns)) # resultant_pcd = patterntocloud( # np.reshape(visible_patterns, (len(az_range) * len(elev_range), 1)), # shell_coords, # maxarea, # ) sample_shell.point_data["Projected_Area"] = visible_patterns.reshape(-1, 1) return visible_patterns, sample_shell
@njit(cache=True, nogil=True) def unit_vector(vector): """Returns the unit vector of the vector.""" return vector / np.linalg.norm(vector) @njit(cache=True, nogil=True) def angle(vector1, vector2): """Returns the angle in radians between given vectors""" v1_u = unit_vector(vector1) v2_u = unit_vector(vector2) minor = np.linalg.det(np.stack((v1_u[-2:], v2_u[-2:]))) if minor == 0: sign = 1 else: sign = -np.sign(minor) dot_p = np.dot(v1_u, v2_u) dot_p = min(max(dot_p, -1.0), 1.0) return sign * np.arccos(dot_p) @njit(parallel=True) def angle_pop(hit_directions, source_normals, angles): # parallised angle calculation for angle_inc in prange(len(angles)): angles[angle_inc] = angle( hit_directions[angle_inc, :], source_normals[angle_inc, :] ) return angles @njit(parallel=True) def patternsort(visible_patterns, sourcenum, sinknum, portion, hit_index): # faster method of summing the array contributions to each farfield point for n in range(sinknum): visible_patterns[n] = np.sum(portion[hit_index[:, 1] - 1 - sourcenum == n]) return visible_patterns def patterncreator(az_range, elev_range, source_coords, pointingindex, hit_index): # creates visibility pattern from azimuth and elevation ranges visible_patterns = np.empty( (len(az_range), len(elev_range), len(source_coords)), dtype=np.int32 ) visible_patterns[:, :, :] = 0 for n in range(len(az_range)): for m in range(len(elev_range)): for element in range(len(source_coords)): if np.any( np.all( np.asarray( [ hit_index[:, 0] == element, hit_index[:, 1] == pointingindex[n, m], ] ), axis=0, ) ): visible_patterns[n, m, element] = 1 return visible_patterns def quickpatterncreator( az_range, elev_range, source_coords, angles, vertex_area, hit_index ): # creates visibility pattern from azimuth and elevation ranges visible_patterns = np.empty((len(az_range), len(elev_range)), dtype=np.float32) visible_patterns[:, :] = 0 pointingindex = np.reshape( np.arange(0, len(az_range) * len(elev_range)), (len(az_range), len(elev_range)) ) sourcenum = len(source_coords) if vertex_area == 0: for n in range(len(az_range)): for m in range(len(elev_range)): visible_patterns[n, m] = np.sum( hit_index[:, 1] - sourcenum - 1 == pointingindex[n, m] ) else: portion = vertex_area * np.cos(angles) portion[portion < 0] = 0 for n in range(len(az_range)): for m in range(len(elev_range)): visible_patterns[n, m] = np.sum( portion[hit_index[:, 1] - sourcenum - 1 == pointingindex[n, m]] ) return visible_patterns def azeltocart(az_data, el_data, radius): # convert from az,el and radius data to xyz x_data = radius * np.cos(np.deg2rad(el_data)) * np.cos(np.deg2rad(az_data)) y_data = radius * np.cos(np.deg2rad(el_data)) * np.sin(np.deg2rad(az_data)) z_data = radius * np.sin(np.deg2rad(el_data)) return x_data, y_data, z_data def convertTriangles(triangle_object): """ convert meshio.Mesh triangle object to ray tracer triangle class """ if triangle_object is None: triangles = np.empty(0, dtype=base_types.triangle_t) else: # assert triangle_object.cells[1].type == "triangle", "Not a triangle mesh" for idx in range(len(triangle_object.cells)): if triangle_object.cells[idx].type == "triangle": triangle_cell_index = idx vertices = np.asarray(triangle_object.points) tri_index = np.asarray(triangle_object.cells[triangle_cell_index].data) triangles = np.empty(len(tri_index), dtype=base_types.triangle_t) for idx in range(len(tri_index)): triangles[idx]["v0x"] = np.single(vertices[tri_index[idx, 0], 0]) triangles[idx]["v0y"] = np.single(vertices[tri_index[idx, 0], 1]) triangles[idx]["v0z"] = np.single(vertices[tri_index[idx, 0], 2]) triangles[idx]["v1x"] = np.single(vertices[tri_index[idx, 1], 0]) triangles[idx]["v1y"] = np.single(vertices[tri_index[idx, 1], 1]) triangles[idx]["v1z"] = np.single(vertices[tri_index[idx, 1], 2]) triangles[idx]["v2x"] = np.single(vertices[tri_index[idx, 2], 0]) triangles[idx]["v2y"] = np.single(vertices[tri_index[idx, 2], 1]) triangles[idx]["v2z"] = np.single(vertices[tri_index[idx, 2], 2]) return triangles @guvectorize([(float32[:, :], float32[:])], "(m,n)->(m)", target="parallel") def PathLength(partial_network, path_length): """ for each row in the partial_network, calculate the distance travelled from the origin to the sink point # Parameters ---------- partial_network : n by (3*m) array of coordinates # Returns ------- path_lengths : n by 1 array of path lengths between each point. # """ if partial_network.shape[1] == 6: for i in range(partial_network.shape[0]): path_length[i] = np.sqrt( (partial_network[i, 3] - partial_network[i, 0]) ** 2 + (partial_network[i, 4] - partial_network[i, 1]) ** 2 + (partial_network[i, 5] - partial_network[i, 2]) ** 2 ) elif partial_network.shape[1] == 9: for i in range(partial_network.shape[0]): path_length[i] = np.sqrt( (partial_network[i, 3] - partial_network[i, 0]) ** 2 + (partial_network[i, 4] - partial_network[i, 1]) ** 2 + (partial_network[i, 5] - partial_network[i, 2]) ** 2 ) + np.sqrt( (partial_network[i, 6] - partial_network[i, 3]) ** 2 + (partial_network[i, 7] - partial_network[i, 4]) ** 2 + (partial_network[i, 8] - partial_network[i, 5]) ** 2 ) elif partial_network.shape[1] == 12: for i in range(partial_network.shape[0]): path_length[i] = ( np.sqrt( (partial_network[i, 3] - partial_network[i, 0]) ** 2 + (partial_network[i, 4] - partial_network[i, 1]) ** 2 + (partial_network[i, 5] - partial_network[i, 2]) ** 2 ) + np.sqrt( (partial_network[i, 6] - partial_network[i, 3]) ** 2 + (partial_network[i, 7] - partial_network[i, 4]) ** 2 + (partial_network[i, 8] - partial_network[i, 5]) ** 2 ) + np.sqrt( (partial_network[i, 9] - partial_network[i, 6]) ** 2 + (partial_network[i, 10] - partial_network[i, 7]) ** 2 + (partial_network[i, 11] - partial_network[i, 8]) ** 2 ) ) # @guvectorize([(int32[:,:], float32[:,:], float32[:,:], float32[:,:], float32, complex64[:,:])], '(m,n)->(m,n)',target='parallel') @njit(parallel=True, nogil=True, cache=True) def ScatteringNetworkGenerator( network_index, scattering_points, scattering_normals, scattering_weights, point_information, scattering_coefficient, wavelength, local_channel, ): """ for each row in the partial_network, calculate the distance travelled from the origin to the sink point # Parameters ---------- partial_network : n by (3*m) array of coordinates # Returns ------- local_channel : n by 3 array of electric field vectors at each sink # """ if network_index.shape[1] == 2: for i in prange(network_index.shape[0]): local_channel[i, :] = EM.losChannel( scattering_points[network_index[i, 0] - 1, :], scattering_normals[network_index[i, 0] - 1, :], scattering_weights[network_index[i, 0] - 1, :], point_information[network_index[i, 0] - 1], scattering_points[network_index[i, 1] - 1, :], scattering_normals[network_index[i, 1] - 1, :], scattering_weights[network_index[i, 1] - 1, :], point_information[network_index[i, 1] - 1], scattering_coefficient, wavelength, ) elif network_index.shape[1] == 3: for i in prange(network_index.shape[0]): local_channel[i, :] = EM.losplus1Channel( scattering_points[network_index[i, 0] - 1, :], scattering_normals[network_index[i, 0] - 1, :], scattering_weights[network_index[i, 0] - 1, :], point_information[network_index[i, 0] - 1], scattering_points[network_index[i, 1] - 1, :], scattering_normals[network_index[i, 1] - 1, :], scattering_weights[network_index[i, 1] - 1, :], point_information[network_index[i, 1] - 1], scattering_points[network_index[i, 2] - 1, :], scattering_normals[network_index[i, 2] - 1, :], scattering_weights[network_index[i, 2] - 1, :], point_information[network_index[i, 2] - 1], scattering_coefficient, wavelength, ) elif network_index.shape[1] == 4: for i in prange(network_index.shape[0]): local_channel[i, :] = EM.losplus2Channel( scattering_points[network_index[i, 0] - 1, :], scattering_normals[network_index[i, 0] - 1, :], scattering_weights[network_index[i, 0] - 1, :], point_information[network_index[i, 0] - 1], scattering_points[network_index[i, 1] - 1, :], scattering_normals[network_index[i, 1] - 1, :], scattering_weights[network_index[i, 1] - 1, :], point_information[network_index[i, 1] - 1], scattering_points[network_index[i, 2] - 1, :], scattering_normals[network_index[i, 2] - 1, :], scattering_weights[network_index[i, 2] - 1, :], point_information[network_index[i, 2] - 1], scattering_points[network_index[i, 3] - 1, :], scattering_normals[network_index[i, 3] - 1, :], scattering_weights[network_index[i, 3] - 1, :], point_information[network_index[i, 3] - 1], scattering_coefficient, wavelength, ) return local_channel @njit(parallel=True, nogil=True, cache=True) def ScatteringNetworkGeneratorTime( network_index, scattering_points, scattering_normals, scattering_weights, point_information, wavelength, local_channel, times, ): """ for each row in the partial_network, calculate the distance travelled from the origin to the sink point # Parameters ---------- partial_network : n by (3*m) array of coordinates # Returns ------- local_channel : n by 3 array of electric field vectors at each sink # """ if network_index.shape[1] == 2: for i in prange(network_index.shape[0]): local_channel[i, :], times[i] = EM.losChannelv2( scattering_points[network_index[i, 0] - 1, :], scattering_normals[network_index[i, 0] - 1, :], scattering_weights[network_index[i, 0] - 1, :], point_information[network_index[i, 0] - 1], scattering_points[network_index[i, 1] - 1, :], scattering_normals[network_index[i, 1] - 1, :], scattering_weights[network_index[i, 1] - 1, :], point_information[network_index[i, 1] - 1], wavelength, ) elif network_index.shape[1] == 3: for i in prange(network_index.shape[0]): local_channel[i, :], times[i] = EM.losplus1Channelv2( scattering_points[network_index[i, 0] - 1, :], scattering_normals[network_index[i, 0] - 1, :], scattering_weights[network_index[i, 0] - 1, :], point_information[network_index[i, 0] - 1], scattering_points[network_index[i, 1] - 1, :], scattering_normals[network_index[i, 1] - 1, :], scattering_weights[network_index[i, 1] - 1, :], point_information[network_index[i, 1] - 1], scattering_points[network_index[i, 2] - 1, :], scattering_normals[network_index[i, 2] - 1, :], scattering_weights[network_index[i, 2] - 1, :], point_information[network_index[i, 2] - 1], wavelength, ) elif network_index.shape[1] == 4: for i in prange(network_index.shape[0]): local_channel[i, :], times[i] = EM.losplus2Channelv2( scattering_points[network_index[i, 0] - 1, :], scattering_normals[network_index[i, 0] - 1, :], scattering_weights[network_index[i, 0] - 1, :], point_information[network_index[i, 0] - 1], scattering_points[network_index[i, 1] - 1, :], scattering_normals[network_index[i, 1] - 1, :], scattering_weights[network_index[i, 1] - 1, :], point_information[network_index[i, 1] - 1], scattering_points[network_index[i, 2] - 1, :], scattering_normals[network_index[i, 2] - 1, :], scattering_weights[network_index[i, 2] - 1, :], point_information[network_index[i, 2] - 1], scattering_points[network_index[i, 3] - 1, :], scattering_normals[network_index[i, 3] - 1, :], scattering_weights[network_index[i, 3] - 1, :], point_information[network_index[i, 3] - 1], wavelength, ) return local_channel, times @njit(parallel=True) def ScatteringNetworkGeneratorv2( network_index, scattering_points, scattering_normals, scattering_weights, point_information, wavelength, local_channel, ): """ for each row in the partial_network, calculate the distance travelled from the origin to the sink point # Parameters ---------- partial_network : n by (3*m) array of coordinates # Returns ------- local_channel : n by 3 array of electric field vectors at each sink # """ if network_index.shape[1] == 2: local_channel = EM.losChannel( scattering_points[network_index[0] - 1], scattering_normals[network_index[0] - 1], scattering_weights[network_index[0] - 1], point_information[network_index[0] - 1], scattering_points[network_index[1] - 1], scattering_normals[network_index[1] - 1], scattering_weights[network_index[1] - 1], point_information[network_index[1] - 1], wavelength, ) elif network_index.shape[1] == 3: local_channel = EM.losplus1Channel( scattering_points[network_index[0] - 1], scattering_normals[network_index[0] - 1], scattering_weights[network_index[0] - 1], point_information[network_index[0] - 1], scattering_points[network_index[1] - 1], scattering_normals[network_index[1] - 1], scattering_weights[network_index[1] - 1], point_information[network_index[1] - 1], scattering_points[network_index[2] - 1], scattering_normals[network_index[2] - 1], scattering_weights[network_index[2] - 1], point_information[network_index[2] - 1], wavelength, ) elif network_index.shape[1] == 4: local_channel = EM.losplus2Channel( scattering_points[network_index[0] - 1], scattering_normals[network_index[0] - 1], scattering_weights[network_index[0] - 1], point_information[network_index[0] - 1], scattering_points[network_index[1] - 1], scattering_normals[network_index[1] - 1], scattering_weights[network_index[1] - 1], point_information[network_index[1] - 1], scattering_points[network_index[2] - 1], scattering_normals[network_index[2] - 1], scattering_weights[network_index[2] - 1], point_information[network_index[2] - 1], scattering_points[network_index[3] - 1], scattering_normals[network_index[3] - 1], scattering_weights[network_index[3] - 1], point_information[network_index[3] - 1], wavelength, ) return local_channel def PoyntingVector(partial_network): """ for each row in the partial_network, calculate the distance travelled from the origin to the sink point # Parameters ---------- partial_network : n by 6 array of coordinates # Returns ------- pointing_vectors : n by 3 array of path lengths between each point. # """ pointing_vectors = np.zeros((partial_network.shape[0], 3), dtype=np.float32) pointing_mags = np.zeros((partial_network.shape[0], 1), dtype=np.float32) for i in range(partial_network.shape[0]): pointing_vectors[i, :] = partial_network[i, -3:] - partial_network[i, -6:-3] pointing_mags[i] = np.sqrt( pointing_vectors[i, 0] ** 2 + pointing_vectors[i, 1] ** 2 + pointing_vectors[i, 2] ** 2 ) pointing_vectors = pointing_vectors / pointing_mags return pointing_vectors def AngleofArrivalVectors(partial_network): """ calculate the vector from the receiving point to the incoming ray, to calculate angle of arrival correctly # Parameters ---------- partial_network : n by 6 array of coordinates # Returns ------- pointing_vectors : n by 3 array of path lengths between each point. # """ pointing_vectors = np.zeros((partial_network.shape[0], 3), dtype=np.float32) pointing_mags = np.zeros((partial_network.shape[0], 1), dtype=np.float32) for i in range(partial_network.shape[0]): pointing_vectors[i, :] = partial_network[i, -6:-3] - partial_network[i, -3:] pointing_mags[i] = np.sqrt( pointing_vectors[i, 0] ** 2 + pointing_vectors[i, 1] ** 2 + pointing_vectors[i, 2] ** 2 ) pointing_vectors = pointing_vectors / pointing_mags return pointing_vectors def CalculatePoyntingVectors( total_network, wavelength, scattering_index, ideal_vector=np.asarray([[1, 0, 0]]), az_bins=np.linspace(-np.pi, np.pi, 19), el_bins=np.linspace(-np.pi / 2.0, np.pi / 2.0, 19), time_bins=np.linspace(-1e-9, 1.9e-8, 20), impulse=False, aoa=True, ): """ Takes the total network generated using the raycasting process, and calculates the angle of arrival spectrum, delay spectrum, and angular standard deviation as a measure of `farfield ness' of the arriving waves' Parameters ---------- total_network : array of n*(m*3) tuples the coordinates of each interaction point wavelength : float wavelength of interest, currently a single value (SI units) scattering_index : n*2 array of integers the source and sink index of each ray ideal_vector : 1*3 array the direction vector of the `ideal' incoming ray time bins : 1d numpy array of floats default is 20 bins, but this should always be set by the user to a sampling rate sufficent for twice the expected highest frequency Returns ------- aoa_spectrum : array of size sinknum*len(theta_bins) angle of arrival spectrum impulse_response : travel_times : """ max_path_divergence = 2.0 if len(total_network) == 0: sourcenum = np.nanmax(scattering_index[:, 0]).astype(int) + 1 sinknum = np.nanmax(scattering_index[:, 1]).astype(int) + 1 aoa_spectrum = np.zeros((sinknum, az_bins.shape[0]), dtype=np.float32) delay_spectrum = np.zeros((sinknum, az_bins.shape[0]), dtype=np.float32) else: wave_vector = (2 * np.pi) / wavelength sourcenum = np.nanmax(scattering_index[:, 0]).astype(int) + 1 sinknum = np.nanmax(scattering_index[:, 1]).astype(int) + 1 scatterdepth = int((len(total_network[0, :]) - 3) / 3) impulse_response = np.zeros((len(time_bins), sinknum), dtype="complex") angle_response = np.zeros((len(az_bins), sinknum), dtype="complex") aoa_spectrum = np.zeros((sinknum, az_bins.shape[0]), dtype=np.float32) delay_spectrum = np.zeros((sinknum, az_bins.shape[0]), dtype=np.float32) az_std = np.zeros((sinknum), dtype=np.float32) averaged_response = np.zeros((sinknum, 1), dtype="complex") phase_std = np.zeros((sinknum), dtype=np.float32) total_pointing_vectors = np.empty((0, 3), dtype=np.float32) paths = np.empty((0, 1), dtype=np.float32).ravel() for idx in range(scatterdepth): if idx == (scatterdepth - 1): # indexing if scatterdepth == 1: start_row = 0 end_row = len(total_network[:, 1]) - 1 else: start_row = ( np.max( np.where((np.isnan(total_network[:, 6 + ((idx - 1) * 3)]))) ) + 1 ) end_row = len(total_network[:, 1]) - 1 end_col = (idx * 3) + 6 # engine here paths = np.append( paths, PathLength(total_network[start_row : end_row + 1, :end_col]), axis=0, ) total_pointing_vectors = np.append( total_pointing_vectors, AngleofArrivalVectors( total_network[start_row : end_row + 1, :end_col] ), axis=0, ) elif idx == 0: # indexing start_row = 0 end_row = np.max(np.where((np.isnan(total_network[:, 6 + (idx * 3)])))) end_col = (idx * 3) + 6 # engine here paths = np.append( paths, PathLength(total_network[start_row : end_row + 1, :end_col]), axis=0, ) total_pointing_vectors = np.append( total_pointing_vectors, AngleofArrivalVectors( total_network[start_row : end_row + 1, :end_col] ), axis=0, ) else: # indexing start_row = ( np.max(np.where((np.isnan(total_network[:, 3 + (idx * 3)])))) + 1 ) end_row = np.max( np.where((np.isnan(total_network[:, 6 + ((idx) * 3)]))) ) end_col = (idx * 3) + 6 # engine here paths = np.append( paths, PathLength(total_network[start_row : end_row + 1, :end_col]), axis=0, ) total_pointing_vectors = np.append( total_pointing_vectors, AngleofArrivalVectors( total_network[start_row : end_row + 1, :end_col] ), axis=0, ) # # scatter_map indexing path_loss = wavelength / (4 * np.pi * paths) ray_components = path_loss * (np.exp(paths * wave_vector * 1j)) phase_components = paths * wave_vector angular_dist = Angular_distance(ideal_vector, total_pointing_vectors) travel_times = paths / scipy.constants.c for sink_index in range(sinknum): sink_indexing = np.where((scattering_index[:, 1] == sink_index)) if impulse: impulse_response = ImpulseStack( impulse_response, ray_components, sink_index, sink_indexing, travel_times, time_bins, ) # impulse_bin_indexing=np.digitize(travel_times[sink_indexing],time_bins+travel_times[np.argmax(ray_components)],right=True) # for time_index in range(len(ray_components[sink_indexing])): # impulse_response[impulse_bin_indexing[time_index],sink_index]=impulse_response[impulse_bin_indexing[time_index],sink_index]+ray_components[sink_indexing][time_index] if aoa: angle_response = angleofarrivalStack( angle_response, ray_components, sink_index, sink_indexing, angular_dist, az_bins, ) # angle_bin_indexing=np.digitize(angular_dist[sink_indexing],az_bins,right=True) # for angle_index in range(len(ray_components[sink_indexing])): # angle_response[angle_bin_indexing[angle_index],sink_index]=angle_response[angle_bin_indexing[angle_index],sink_index]+ray_components[sink_indexing][angle_index] return angle_response, impulse_response, travel_times[np.argmax(ray_components)] @njit(parallel=True) def ImpulseStack( impulse_response, ray_components, sink_index, sink_indexing, travel_times, time_bins ): # attempt to impove the efficiency of the digitize action impulse_bin_indexing = np.digitize( travel_times[sink_indexing], time_bins + travel_times[np.argmax(ray_components)], right=True, ) for time_index in prange(len(ray_components[sink_indexing])): impulse_response[impulse_bin_indexing[time_index], sink_index] = ( impulse_response[impulse_bin_indexing[time_index], sink_index] + ray_components[sink_indexing][time_index] ) return impulse_response @njit(parallel=True) def angleofarrivalStack( angle_response, ray_components, sink_index, sink_indexing, angular_dist, az_bins ): # improved efficiency digitizatoin angle_bin_indexing = np.digitize(angular_dist[sink_indexing], az_bins, right=True) for angle_index in prange(len(ray_components[sink_indexing])): angle_response[angle_bin_indexing[angle_index], sink_index] = ( angle_response[angle_bin_indexing[angle_index], sink_index] + ray_components[sink_indexing][angle_index] ) return angle_response @njit(parallel=True) def Angular_distance(center_vector, pointing_vectors): if len(center_vector) == 1: angle_lists = np.zeros((len(pointing_vectors), 1), dtype=np.float32) for i in range(len(pointing_vectors)): angle_lists[i] = np.arccos(np.dot(center_vector, pointing_vectors[i, :])) # angle_lists[i]=np.arctan2(pointing_vectors[i,1],pointing_vectors[i,0]) else: angle_lists = np.zeros((len(pointing_vectors), 1), dtype=np.float32) for i in range(len(pointing_vectors)): angle_lists[i] = np.arccos( np.dot(center_vector[i, :], pointing_vectors[i, :]) ) # angle_lists[i]=np.arctan2(pointing_vectors[i,1],pointing_vectors[i,0]) return angle_lists # # # # @jit # def VectorNetworkProcessEM( # sourcenum, # sinknum, # unified_points, # unified_normals, # unified_weights, # point_information, # scattering_index, # scattering_coefficient, # wavelength, # ): # """ # A more efficienct version of BaseNetworkProcess, using the vectorised path length function to process the total_network array more efficienctly. # # # Parameters # ---------- # total_network : array of n*(m*3) tuples, the coordinates of each interaction point # wavelength : scaler # wavelength of interest, currently a single value (SI units) # scattering_index : n*2 array of integers, the source and sink index of each ray # # # Returns # ------- # scatter_map : source_size*sink_size array # # # """ # if scattering_index.shape[0] == 0: # scatter_map = np.zeros((sourcenum, sinknum, 3), dtype="complex") # else: # wave_vector = (2 * np.pi) / wavelength # scatterdepth = scattering_index.shape[1] - 1 # scatter_map = np.zeros((sourcenum, sinknum, 3, scatterdepth), dtype="complex") # for idx in range(scatterdepth): # if idx == (scatterdepth - 1): # ray_analysis_index = scattering_index[ # ~np.equal(scattering_index[:, idx + 1], 0), : # ] # ray_components = np.zeros( # (ray_analysis_index.shape[0], 3), dtype=np.complex64 # ) # ray_components = ScatteringNetworkGenerator( # ray_analysis_index, # unified_points, # unified_normals, # unified_weights, # point_information, # scattering_coefficient, # wavelength, # ray_components, # ) # # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) # depth_slice = ray_analysis_index[:, [0, -1]] # # elif idx == 0: # ray_analysis_index = scattering_index[ # np.equal(scattering_index[:, idx + 2], 0), 0 : idx + 2 # ] # ray_components = np.zeros( # (ray_analysis_index.shape[0], 3), dtype=np.complex64 # ) # ray_components = ScatteringNetworkGenerator( # ray_analysis_index, # unified_points, # unified_normals, # unified_weights, # point_information, # scattering_coefficient, # wavelength, # ray_components, # ) # # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) # depth_slice = ray_analysis_index # # else: # ray_analysis_index = scattering_index[ # ~np.equal(scattering_index[:, idx + 1], 0), 0 : idx + 2 # ] # ray_components = np.zeros( # (ray_analysis_index.shape[0], 3), dtype=np.complex64 # ) # ray_components = ScatteringNetworkGenerator( # ray_analysis_index, # unified_points, # unified_normals, # unified_weights, # point_information, # scattering_coefficient, # wavelength, # ray_components, # ) # # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) # depth_slice = ray_analysis_index[:, [0, -1]] # # # # scatter_map indexing # # for source_index in range(sourcenum): # # for sink_index in range(sinknum): # # scatter_map[source_index,sink_index,idx]=np.sum(ray_components[(depth_slice[:,0]-1==0)&(depth_slice[:,1]-sourcenum-1==0)]) # # scatter_map = scatter_net_sortEMtest( # sourcenum, sinknum, scatter_map, depth_slice, ray_components, idx # ) # # return scatter_map # # # def VectorNetworkProcessTimeEM( # sourcenum, # sinknum, # unified_points, # unified_normals, # unified_weights, # point_information, # scattering_index, # wavelength, # ): # """ # A more efficienct version of BaseNetworkProcess, using the vectorised path length function to process the total_network array more efficienctly. # # # Parameters # ---------- # total_network : array of n*(m*3) tuples, the coordinates of each interaction point # wavelength : scaler # wavelength of interest, currently a single value (SI units) # scattering_index : n*2 array of integers, the source and sink index of each ray # # # Returns # ------- # scatter_map : source_size*sink_size array # # # """ # if scattering_index.shape[0] == 0: # scatter_map = np.zeros((sourcenum, sinknum, 3), dtype="complex") # else: # time_index = np.linspace(1e-8, 2e-6, 100000) # wave_vector = (2 * np.pi) / wavelength # scatterdepth = scattering_index.shape[1] - 1 # scatter_map = np.zeros( # (sourcenum, sinknum, 3, len(time_index), scatterdepth), dtype="complex" # ) # for idx in range(scatterdepth): # if idx == (scatterdepth - 1): # ray_analysis_index = scattering_index[ # ~np.equal(scattering_index[:, idx + 1], 0), : # ] # ray_components = np.zeros( # (ray_analysis_index.shape[0], 3), dtype=np.complex64 # ) # time_components = np.zeros( # (ray_analysis_index.shape[0]), dtype=np.float32 # ) # ray_components, time_components = ScatteringNetworkGeneratorTime( # ray_analysis_index, # unified_points, # unified_normals, # unified_weights, # point_information, # wavelength, # ray_components, # time_components, # ) # # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) # depth_slice = ray_analysis_index[:, [0, -1]] # # elif idx == 0: # ray_analysis_index = scattering_index[ # np.equal(scattering_index[:, idx + 2], 0), 0 : idx + 2 # ] # ray_components = np.zeros( # (ray_analysis_index.shape[0], 3), dtype=np.complex64 # ) # time_components = np.zeros( # (ray_analysis_index.shape[0]), dtype=np.float32 # ) # ray_components, time_components = ScatteringNetworkGeneratorTime( # ray_analysis_index, # unified_points, # unified_normals, # unified_weights, # point_information, # wavelength, # ray_components, # time_components, # ) # # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) # depth_slice = ray_analysis_index # # else: # ray_analysis_index = scattering_index[ # ~np.equal(scattering_index[:, idx + 1], 0), 0 : idx + 2 # ] # ray_components = np.zeros( # (ray_analysis_index.shape[0], 3), dtype=np.complex64 # ) # time_components = np.zeros( # (ray_analysis_index.shape[0]), dtype=np.float32 # ) # ray_components, time_components = ScatteringNetworkGeneratorTime( # ray_analysis_index, # unified_points, # unified_normals, # unified_weights, # point_information, # wavelength, # ray_components, # time_components, # ) # # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) # depth_slice = ray_analysis_index[:, [0, -1]] # # # # scatter_map indexing # # for source_index in range(sourcenum): # # for sink_index in range(sinknum): # # scatter_map[source_index,sink_index,idx]=np.sum(ray_components[(depth_slice[:,0]-1==0)&(depth_slice[:,1]-sourcenum-1==0)]) # # scatter_map = scatter_net_sortTimeEM( # sourcenum, # sinknum, # scatter_map, # depth_slice, # ray_components, # time_components, # time_index, # idx, # ) # # return scatter_map # # # def VectorNetworkProcessEMv2( # sourcenum, # sinknum, # unified_points, # unified_normals, # unified_weights, # point_information, # scattering_index, # wavelength, # ): # """ # A more efficienct version of BaseNetworkProcess, using the vectorised path length function to process the total_network array more efficienctly. # # # Parameters # ---------- # total_network : array of n*(m*3) tuples, the coordinates of each interaction point # wavelength : scaler # wavelength of interest, currently a single value (SI units) # scattering_index : n*2 array of integers, the source and sink index of each ray # # # Returns # ------- # scatter_map : source_size*sink_size array # # # """ # if scattering_index.shape[0] == 0: # scatter_map = np.zeros((sourcenum, sinknum, 3), dtype="complex") # else: # wave_vector = (2 * np.pi) / wavelength # scatterdepth = scattering_index.shape[1] - 1 # scatter_map = np.zeros((sourcenum, sinknum, 3, scatterdepth), dtype="complex") # ray_components = np.zeros((scattering_index.shape[0], 3), dtype=np.complex64) # ray_components = EM.EMGPUWrapper( # scattering_index, point_information, wavelength # ) # for idx in range(scatterdepth): # if idx == (scatterdepth - 1): # ray_analysis_index = scattering_index[ # ~np.equal(scattering_index[:, idx + 1], 0), : # ] # temp_components = ray_components[ # ~np.equal(scattering_index[:, idx + 1], 0), : # ] # # ray_components=ScatteringNetworkGenerator(ray_analysis_index,unified_points,unified_normals,unified_weights,point_information,wavelength,ray_components) # # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) # depth_slice = ray_analysis_index[:, [0, -1]] # # elif idx == 0: # ray_analysis_index = scattering_index[ # np.equal(scattering_index[:, idx + 2], 0), 0 : idx + 2 # ] # temp_components = ray_components[ # ~np.equal(scattering_index[:, idx + 2], 0), : # ] # # ray_components=np.zeros((ray_analysis_index.shape[0],3),dtype=np.complex64) # # ray_components=EM.EMGPUWrapper(ray_analysis_index,point_information,wavelength) # # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) # depth_slice = ray_analysis_index # # else: # ray_analysis_index = scattering_index[ # ~np.equal(scattering_index[:, idx + 1], 0), 0 : idx + 2 # ] # temp_components = ray_components[ # ~np.equal(scattering_index[:, idx + 1], 0), : # ] # # ray_components=EM.EMGPUWrapper(ray_analysis_index,point_information,wavelength) # # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) # depth_slice = ray_analysis_index[:, [0, -1]] # # # # scatter_map indexing # # for source_index in range(sourcenum): # # for sink_index in range(sinknum): # # scatter_map[source_index,sink_index,idx]=np.sum(ray_components[(depth_slice[:,0]-1==0)&(depth_slice[:,1]-sourcenum-1==0)]) # # scatter_map = scatter_net_sortEM( # sourcenum, sinknum, scatter_map, depth_slice, temp_components, idx # ) # # return scatter_map # # # # @jit # def VectorNetworkProcessv2( # sources, sinks, scattering_points, scattering_index, wavelength # ): # """ # A more efficienct version of BaseNetworkProcess, using the vectorised path length function to process the total_network array more efficienctly. # # # Parameters # ---------- # total_network : array of n*(m*3) tuples, the coordinates of each interaction point # wavelength : scaler # wavelength of interest, currently a single value (SI units) # scattering_index : n*2 array of integers, the source and sink index of each ray # # # Returns # ------- # scatter_map : source_size*sink_size array # # # """ # if scattering_index.shape[0] == 0: # sourcenum = np.nanmax(scattering_index[:, 0]).astype(int) + 1 # sinknum = np.nanmax(scattering_index[:, 1]).astype(int) + 1 # scatter_map = np.zeros((sourcenum, sinknum, 1), dtype="complex") # else: # unified_model = np.append( # np.append(sources.astype(np.float32), sinks.astype(np.float32), axis=0), # scattering_points.astype(np.float32), # axis=0, # ) # wave_vector = (2 * np.pi) / wavelength # sourcenum = sources.shape[0] # sinknum = sinks.shape[0] # scatterdepth = scattering_index.shape[1] - 1 # scatter_map = np.zeros((sourcenum, sinknum, scatterdepth), dtype="complex") # for idx in range(scatterdepth): # if idx == (scatterdepth - 1): # ray_analysis_index = scattering_index[ # ~np.equal(scattering_index[:, idx + 1], 0), : # ] # if idx == 0: # paths = PathLength( # np.append( # unified_model[ray_analysis_index[:, 0], :] - 1, # unified_model[ray_analysis_index[:, 1] - 1, :], # axis=1, # ) # ) # elif idx == 1: # paths = PathLength( # np.append( # np.append( # unified_model[ray_analysis_index[:, 0] - 1, :], # unified_model[ray_analysis_index[:, 1] - 1, :], # axis=1, # ), # unified_model[ray_analysis_index[:, 2] - 1, :], # axis=1, # ) # ) # elif idx == 2: # paths = PathLength( # np.append( # np.append( # np.append( # unified_model[ray_analysis_index[:, 0] - 1, :], # unified_model[ray_analysis_index[:, 1] - 1, :], # axis=1, # ), # unified_model[ray_analysis_index[:, 2] - 1, :], # axis=1, # ), # unified_model[ray_analysis_index[:, 3] - 1, :], # axis=1, # ) # ) # # # path_loss=((wavelength/(4*np.pi*paths))**2)*np.exp(paths*wave_vector*1j) # path_loss = wavelength / (4 * np.pi * paths) # ray_components = path_loss * (np.exp(paths * wave_vector * 1j)) # # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) # depth_slice = ray_analysis_index[:, [0, -1]] # # elif idx == 0: # ray_analysis_index = scattering_index[ # np.equal(scattering_index[:, idx + 2], 0), 0 : idx + 2 # ] # paths = PathLength( # np.append( # unified_model[ray_analysis_index[:, 0] - 1, :], # unified_model[ray_analysis_index[:, 1] - 1, :], # axis=1, # ) # ) # # path_loss=((wavelength/(4*np.pi*paths))**2)*np.exp(paths*wave_vector*1j) # path_loss = wavelength / (4 * np.pi * paths) # ray_components = path_loss * (np.exp(paths * wave_vector * 1j)) # # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) # depth_slice = ray_analysis_index # # else: # ray_analysis_index = scattering_index[ # ~np.equal(scattering_index[:, idx + 1], 0), : # ] # if idx == 0: # paths = PathLength( # np.append( # unified_model[ray_analysis_index[:, 0], :] - 1, # unified_model[ray_analysis_index[:, 1] - 1, :], # axis=1, # ) # ) # elif idx == 1: # paths = PathLength( # np.append( # np.append( # unified_model[ray_analysis_index[:, 0] - 1, :], # unified_model[ray_analysis_index[:, 1] - 1, :], # axis=1, # ), # unified_model[ray_analysis_index[:, 2] - 1, :], # axis=1, # ) # ) # elif idx == 2: # paths = PathLength( # np.append( # np.append( # np.append( # unified_model[ray_analysis_index[:, 0] - 1, :], # unified_model[ray_analysis_index[:, 1] - 1, :], # axis=1, # ), # unified_model[ray_analysis_index[:, 2] - 1, :], # axis=1, # ), # unified_model[ray_analysis_index[:, 3] - 1, :], # axis=1, # ) # ) # # # path_loss=((wavelength/(4*np.pi*paths))**2)*np.exp(paths*wave_vector*1j) # path_loss = wavelength / (4 * np.pi * paths) # ray_components = path_loss * (np.exp(paths * wave_vector * 1j)) # # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) # depth_slice = ray_analysis_index[:, [0, -1]] # # # # scatter_map indexing # # for source_index in range(sourcenum): # # for sink_index in range(sinknum): # # scatter_map[source_index,sink_index,idx]=np.sum(ray_components[(depth_slice[:,0]-1==0)&(depth_slice[:,1]-sourcenum-1==0)]) # # scatter_map = scatter_net_sorttest( # sourcenum, sinknum, scatter_map, depth_slice, ray_components, idx # ) # # return scatter_map @njit(parallel=True) def scatter_net_sort(sourcenum, sinknum, scatter_map, depth_slice, ray_components, idx): # simplified scattering sort to create a scattering network using Numbas prange and loop unrolling. for sink_index in range(sinknum): for source_index in range(sourcenum): scatter_map[source_index, sink_index, idx] = np.nansum( ray_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index) ] ) return scatter_map def scatter_net_sorttest( sourcenum, sinknum, scatter_map, depth_slice, ray_components, idx ): # simplified scattering sort to create a scattering network using Numbas prange and loop unrolling. for sink_index in range(sinknum): for source_index in range(sourcenum): scatter_map[source_index, sink_index, idx] = np.nansum( ray_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index) ] ) return scatter_map def scatter_net_sortEMtest( sourcenum, sinknum, scatter_map, depth_slice, ray_components, idx ): # simplified scattering sort to create a scattering network using Numbas prange and loop unrolling. for sink_index in range(sinknum): for source_index in range(sourcenum): scatter_map[source_index, sink_index, 0, idx] = np.nansum( ray_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index), 0, ] ) scatter_map[source_index, sink_index, 1, idx] = np.nansum( ray_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index), 1, ] ) scatter_map[source_index, sink_index, 2, idx] = np.nansum( ray_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index), 2, ] ) return scatter_map @njit(parallel=True) def scatter_net_sortEM( sourcenum, sinknum, scatter_map, depth_slice, ray_components, idx ): # simplified scattering sort to create a scattering network using Numbas prange and loop unrolling. for sink_index in range(sinknum): for source_index in range(sourcenum): scatter_map[source_index, sink_index, 0, idx] = np.sum( ray_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index), 0, ] ) scatter_map[source_index, sink_index, 1, idx] = np.sum( ray_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index), 1, ] ) scatter_map[source_index, sink_index, 2, idx] = np.sum( ray_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index), 2, ] ) return scatter_map # @njit(parallel=True) def scatter_net_sortTimeEM( sourcenum, sinknum, scatter_map, depth_slice, ray_components, time_components, time_index, idx, ): # simplified scattering sort to create a scattering network using Numbas prange and loop unrolling. for sink_index in range(sinknum): for source_index in range(sourcenum): time_address = np.digitize( time_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index) ], time_index, ) scatter_map[source_index, sink_index, 0, time_address, idx] = np.sum( ray_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index), 0, ] ) scatter_map[source_index, sink_index, 1, time_address, idx] = np.sum( ray_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index), 1, ] ) scatter_map[source_index, sink_index, 2, time_address, idx] = np.sum( ray_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index), 2, ] ) return scatter_map def BaseNetworkProcess(total_network, wavelength, scattering_index): # take the total_network matrix, and work out the path length of each connection, and hence superposition wave_vector = (2 * np.pi) / wavelength sourcenum = np.nanmax(scattering_index[:, 0]).astype(int) + 1 sinknum = np.nanmax(scattering_index[:, 1]).astype(int) + 1 scatterdepth = int((len(total_network[1, :]) - 3) / 3) scatter_map = np.zeros((sourcenum, sinknum, scatterdepth), dtype="complex") depth_num = 0 for idx in range(scatterdepth): if idx == (scatterdepth - 1): if scatterdepth == 1: start_row = 0 end_row = len(total_network[:, 1]) - 1 else: start_row = ( np.max(np.where((np.isnan(total_network[:, 6 + ((idx - 1) * 3)])))) + 1 ) end_row = len(total_network[:, 1]) - 1 elif idx == 0: start_row = 0 end_row = np.max(np.where((np.isnan(total_network[:, 6 + (idx * 3)])))) else: start_row = ( np.max(np.where((np.isnan(total_network[:, 3 + (idx * 3)])))) + 1 ) end_row = np.max(np.where((np.isnan(total_network[:, 6 + ((idx) * 3)])))) for idr in range(start_row, end_row + 1): source_index = scattering_index[idr, 0].astype(int) sink_index = scattering_index[idr, 1].astype(int) path_distance = np.zeros((1)) if idx == 0: path_distance = ( distance.euclidean( total_network[idr, np.asarray([0, 1, 2]) + ((0) * 3)], total_network[idr, np.asarray([3, 4, 5]) + ((0) * 3)], ) + path_distance ) else: for idy in range(idx + 1): path_distance = ( distance.euclidean( total_network[idr, np.asarray([0, 1, 2]) + ((idy) * 3)], total_network[idr, np.asarray([3, 4, 5]) + ((idy) * 3)], ) + path_distance ) path_loss = (4 * np.pi * path_distance) / wavelength scatter_map[source_index, sink_index, idx] = (1 / path_loss) * np.exp( -path_distance * wave_vector * 1j ) + scatter_map[source_index, sink_index, idx] return scatter_map def charge_rays_environment1Dv2(sources, sinks, environment_points, point_indexing): """ Generate Ray Payload from numpy arrays of sources, sinks, and environment points, in a 1 dimensional array to match the point_indexing demands Parameters ---------- sources : TYPE DESCRIPTION. sinks : TYPE DESCRIPTION. environment_points : numpy array of xyz points for scattering in environment point_indexing : (n*m) by 2 array, 0 has the source index, and 1 has the sink index, otherwise nans' Returns ------- temp_ray_payload : array of ray_t type ray payload to be sent to GPU """ # print("sources shape", sources.shape) # print("sinks shape", sinks.shape) # print("environment_points shape", environment_points.shape) unified_model = np.append( np.append(sources, sinks, axis=0), environment_points, axis=0 ) temp_ray_payload = np.empty(point_indexing.shape[0], dtype=base_types.ray_t) local_sources = unified_model[point_indexing[:, 0] - 1, :] directions = np.zeros( (len(unified_model[point_indexing[:, 1] - 1, :]), 3), dtype=np.float32 ) norm_length = np.zeros( (len(unified_model[point_indexing[:, 1] - 1, :]), 1), dtype=np.float32 ) directions, norm_length = math_functions.calc_dv_norm( unified_model[point_indexing[:, -2] - 1, :], unified_model[point_indexing[:, -1] - 1, :], directions, norm_length, ) temp_ray_payload[:]["ox"] = unified_model[point_indexing[:, -2] - 1, 0] temp_ray_payload[:]["oy"] = unified_model[point_indexing[:, -2] - 1, 1] temp_ray_payload[:]["oz"] = unified_model[point_indexing[:, -2] - 1, 2] temp_ray_payload[:]["dx"] = directions[:, 0] temp_ray_payload[:]["dy"] = directions[:, 1] temp_ray_payload[:]["dz"] = directions[:, 2] # temp_ray_payload[:]['tx']=unified_model[point_indexing[:,1]-1,0] # temp_ray_payload[:]['ty']=unified_model[point_indexing[:,1]-1,1] # temp_ray_payload[:]['tz']=unified_model[point_indexing[:,1]-1,2] temp_ray_payload[:]["dist"] = norm_length[:, 0] temp_ray_payload[:]["intersect"] = False return temp_ray_payload def rayHits1Dv2(ray_payload, point_indexing, sink_index): """ filtering process for each raycasting stage, seprating rays which hit sinks from non-terminating rays. Parameters ---------- ray_payload : TYPE DESCRIPTION. point_indexing : TYPE DESCRIPTION. scatter_inc : TYPE, optional DESCRIPTION. The default is 1. Returns ------- filtered_network : TYPE DESCRIPTION. final_network : TYPE DESCRIPTION. filtered_index : TYPE DESCRIPTION. final_index : TYPE DESCRIPTION. """ hitmap = ~ray_payload[:]["intersect"] final_index = point_indexing[ np.all(np.array([hitmap, np.isin(point_indexing[:, -1], sink_index)]), axis=0), :, ] filtered_index = point_indexing[ np.all(np.array([hitmap, ~np.isin(point_indexing[:, -1], sink_index)]), axis=0), :, ] return filtered_index, final_index def launchRaycaster1Dv2( sources, sinks, scattering_points, io_indexing, environment_local ): # cuda.select_device(0) start = timer() first_ray_payload = charge_rays_environment1Dv2( sources, sinks, scattering_points, io_indexing ) prep_dt = timer() - start raystart = timer() ray_num = len(first_ray_payload) tri_num = len(environment_local) max_tris = 2**20 triangle_chunk = np.linspace( 0, tri_num, math.ceil(tri_num / max_tris) + 1, dtype=np.int32 ) chunk_size = 2**10 threads_in_block = 1024 ray_chunks = np.linspace( 0, ray_num, math.ceil(ray_num / chunk_size) + 1, dtype=np.int32 ) # for idx in range(len(triangle_chunk)-1): d_environment = cuda.device_array( len(environment_local), dtype=base_types.triangle_t ) cuda.to_device(environment_local, to=d_environment) for n in range(len(ray_chunks) - 1): chunk_payload = first_ray_payload[ray_chunks[n] : ray_chunks[n + 1]] chunk_ray_size = len(chunk_payload) d_chunk_payload = cuda.device_array([chunk_ray_size], dtype=base_types.ray_t) cuda.to_device(chunk_payload, to=d_chunk_payload) # # ray_temp=np.empty((chunk_ray_size),dtype=np.bool) # ray_temp[:]=first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect'] # distance_temp=np.empty((chunk_ray_size),dtype=np.float32) # distance_temp[:]=math.inf # dist_list=cuda.to_device(distance_temp) # d_ray_flag=cuda.to_device(ray_temp) # Here, we choose the granularity of the threading on our device. We want # to try to cover the entire workload of rays and targets with simulatenous threads, so we'll # choose a grid of (source_num/16. target_num/16) blocks, each with (16, 16) threads grids = math.ceil(chunk_ray_size / threads_in_block) threads = min(ray_chunks[1] - ray_chunks[0], threads_in_block) # Execute the kernel # cuda.profile_start() # kernel1Dv2[grids, threads](d_chunk_payload,d_environment,d_ray_flag) kernel1Dv2[grids, threads](d_chunk_payload, d_environment) # cuda.profile_stop() # distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host() first_ray_payload[ray_chunks[n] : ray_chunks[n + 1]] = ( d_chunk_payload.copy_to_host() ) # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect']=d_chunk_payload['intersect'].copy_to_host() # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['dist']=d_chunk_payload['dist'].copy_to_host() # cuda.close() kernel_dt = timer() - raystart start = timer() RAYS_CAST = ray_num sink_index = np.arange( sources.shape[0] + 1, sources.shape[0] + 1 + sinks.shape[0] ).reshape(sinks.shape[0], 1) filtered_index, final_index = rayHits1Dv2( first_ray_payload, io_indexing, sink_index ) mem_dt = timer() - start # print("First Stage: Prep {:3.1f} s, Raycasting {:3.1f} s, Path Processing {:3.1f} s".format(prep_dt,kernel_dt,mem_dt) ) return filtered_index, final_index, RAYS_CAST def launchRaycaster1Dv3( sources, sinks, scattering_points, io_indexing, environment_local ): # cuda.select_device(0) start = timer() first_ray_payload = charge_rays_environment1Dv2( sources, sinks, scattering_points, io_indexing ) prep_dt = timer() - start raystart = timer() ray_num = len(first_ray_payload) tri_num = len(environment_local) # print('Launch Raycaster Triangles ', len(environment_local)) max_tris = 2**20 triangle_chunk = np.linspace( 0, tri_num, math.ceil(tri_num / max_tris) + 1, dtype=np.int32 ) chunk_size = 2**18 threads_in_block = 256 # ray_chunks=np.linspace(0,ray_num,math.ceil(ray_num/chunk_size)+1,dtype=np.int32) # for idx in range(len(triangle_chunk)-1): d_environment = cuda.device_array( len(environment_local), dtype=base_types.triangle_t ) cuda.to_device(environment_local, to=d_environment) d_chunk_payload = cuda.device_array([ray_num], dtype=base_types.ray_t) cuda.to_device(first_ray_payload, to=d_chunk_payload) # # ray_temp=np.empty((chunk_ray_size),dtype=np.bool) # ray_temp[:]=first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect'] # distance_temp=np.empty((chunk_ray_size),dtype=np.float32) # distance_temp[:]=math.inf # dist_list=cuda.to_device(distance_temp) # d_ray_flag=cuda.to_device(ray_temp) # Here, we choose the granularity of the threading on our device. We want # to try to cover the entire workload of rays and targets with simulatenous threads, so we'll # choose a grid of (source_num/16. target_num/16) blocks, each with (16, 16) threads grids = math.ceil(ray_num / threads_in_block) threads = threads_in_block # Execute the kernel # cuda.profile_start() # kernel1Dv2[grids, threads](d_chunk_payload,d_environment,d_ray_flag) kernel1Dv3[grids, threads](d_chunk_payload, d_environment) # cuda.profile_stop() # distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host() first_ray_payload = d_chunk_payload.copy_to_host() # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect']=d_chunk_payload['intersect'].copy_to_host() # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['dist']=d_chunk_payload['dist'].copy_to_host() # cuda.close() kernel_dt = timer() - raystart start = timer() RAYS_CAST = ray_num sink_index = np.arange( sources.shape[0] + 1, sources.shape[0] + 1 + sinks.shape[0] ).reshape(sinks.shape[0], 1) filtered_index, final_index = rayHits1Dv2( first_ray_payload, io_indexing, sink_index ) mem_dt = timer() - start # deallocate memory on gpu # ctx = cuda.current_context() # deallocs = ctx.deallocations # deallocs.clear() # print("First Stage: Prep {:3.1f} s, Raycasting {:3.1f} s, Path Processing {:3.1f} s".format(prep_dt,kernel_dt,mem_dt) ) return filtered_index, final_index, RAYS_CAST def chunkingRaycaster1Dv2( sources, sinks, scattering_points, filtered_index, environment_local, terminate_flag ): # cuda.select_device(0) start = timer() sink_index = np.arange( sources.shape[0] + 1, sources.shape[0] + 1 + sinks.shape[0] ).reshape(sinks.shape[0], 1) scattering_point_index = np.arange( np.max(sink_index) + 1, np.max(sink_index) + 1 + scattering_points.shape[0] ).reshape(scattering_points.shape[0], 1) if not terminate_flag: target_indexing = create_model_index( filtered_index, sink_index, scattering_point_index ) else: target_indexing = create_model_index( filtered_index, sink_index, np.empty((0, 0), dtype=np.int32) ) # only target rays at sinks second_ray_payload = charge_rays_environment1Dv2( sources, sinks, scattering_points, target_indexing ) prep_dt = timer() - start raystart = timer() ray_num = len(second_ray_payload) tri_num = len(environment_local) max_tris = 2**18 triangle_chunk = np.linspace( 0, tri_num, math.ceil(tri_num / max_tris) + 1, dtype=np.int32 ) # Create a container for the pixel RGBA information of our image chunk_size = 2**10 threads_in_block = 1024 ray_chunks = np.linspace( 0, ray_num, math.ceil(ray_num / chunk_size) + 1, dtype=np.int32 ) # for idx in range(len(triangle_chunk)-1): d_environment = cuda.device_array( len(environment_local), dtype=base_types.triangle_t ) cuda.to_device(environment_local, to=d_environment) for n in range(len(ray_chunks) - 1): chunk_payload = second_ray_payload[ray_chunks[n] : ray_chunks[n + 1]] chunk_ray_size = len(chunk_payload) # distance_temp=np.empty((chunk_ray_size),dtype=np.float32) # distance_temp[:]=math.inf # ray_temp=np.empty((chunk_ray_size),dtype=np.bool) # ray_temp[:]=False # dist_list=cuda.to_device(distance_temp) # d_ray_flag=cuda.to_device(ray_temp) # d_chunk_payload = cuda.device_array([chunk_ray_size], dtype=ray_t) d_chunk_payload = cuda.device_array([chunk_ray_size], dtype=base_types.ray_t) cuda.to_device(chunk_payload, to=d_chunk_payload) # Here, we choose the granularity of the threading on our device. We want # to try to cover the entire workload of rays and targets with simulatenous threads, so we'll # choose a grid of (source_num/16. target_num/16) blocks, each with (16, 16) threads grids = math.ceil(chunk_ray_size / threads_in_block) threads = min(ray_chunks[1] - ray_chunks[0], threads_in_block) # Execute the kernel # cuda.profile_start() kernel1Dv2[grids, threads](d_chunk_payload, d_environment) # cuda.profile_stop() # distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host() # second_ray_payload[ray_chunks[n]:ray_chunks[n+1]]=d_chunk_payload.copy_to_host() # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]=d_chunk_payload.copy_to_host() second_ray_payload[ray_chunks[n] : ray_chunks[n + 1]] = ( d_chunk_payload.copy_to_host() ) # cuda.close() kernel_dt = timer() - raystart start = timer() RAYS_CAST = ray_num filtered_index2, final_index2 = rayHits1Dv2( second_ray_payload, target_indexing, sink_index ) mem_dt = timer() - start # print("Second Stage: Prep {:3.1f} s, Raycasting {:3.1f} s, Path Processing {:3.1f} s".format(prep_dt,kernel_dt,mem_dt) ) return filtered_index2, final_index2, RAYS_CAST def chunkingRaycaster1Dv3( sources, sinks, scattering_points, filtered_index, environment_local, terminate_flag ): start = timer() cuda.current_context().memory_manager.deallocations.clear() free_mem, total_mem = cuda.current_context().get_memory_info() max_mem = np.ceil(free_mem * 0.5).astype(np.int64) ray_limit = np.floor( ((max_mem - environment_local.nbytes) / base_types.ray_t.size) ).astype(np.int64) sink_index = np.arange( sources.shape[0] + 1, sources.shape[0] + 1 + sinks.shape[0] ).reshape(sinks.shape[0], 1) scattering_point_index = np.arange( np.max(sink_index) + 1, np.max(sink_index) + 1 + scattering_points.shape[0] ).reshape(scattering_points.shape[0], 1) prep_dt = timer() - start raystart = timer() RAYS_CAST = 0 if not terminate_flag: target_indexing = create_model_index( filtered_index, sink_index, scattering_point_index ) second_ray_payload = charge_rays_environment1Dv2( sources, sinks, scattering_points, target_indexing ) # print('Chunking Raycaster Triangles ', len(environment_local)) # max_tris=2**18 # triangle_chunk=np.linspace(0,tri_num,math.ceil(tri_num/max_tris)+1,dtype=np.int32) # chunk_size=2**11 threads_in_block = 256 # ray_chunks=np.linspace(0,ray_num,math.ceil(ray_num/chunk_size)+1,dtype=np.int32) # for idx in range(len(triangle_chunk)-1): d_environment = cuda.device_array( len(environment_local), dtype=base_types.triangle_t ) cuda.to_device(environment_local, to=d_environment) d_chunk_payload = cuda.device_array( [second_ray_payload.shape[0]], dtype=base_types.ray_t ) cuda.to_device(second_ray_payload, to=d_chunk_payload) # # ray_temp=np.empty((chunk_ray_size),dtype=np.bool) # ray_temp[:]=first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect'] # distance_temp=np.empty((chunk_ray_size),dtype=np.float32) # distance_temp[:]=math.inf # dist_list=cuda.to_device(distance_temp) # d_ray_flag=cuda.to_device(ray_temp) # Here, we choose the granularity of the threading on our device. We want # to try to cover the entire workload of rays and targets with simulatenous threads, so we'll # choose a grid of (source_num/16. target_num/16) blocks, each with (16, 16) threads grids = math.ceil(second_ray_payload.shape[0] / threads_in_block) threads = threads_in_block # Execute the kernel # cuda.profile_start() # kernel1Dv2[grids, threads](d_chunk_payload,d_environment,d_ray_flag) kernel1Dv3[grids, threads](d_chunk_payload, d_environment) # cuda.profile_stop() # distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host() second_ray_payload = d_chunk_payload.copy_to_host() # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect']=d_chunk_payload['intersect'].copy_to_host() # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['dist']=d_chunk_payload['dist'].copy_to_host() # cuda.close() kernel_dt = timer() - raystart start = timer() RAYS_CAST += second_ray_payload.shape[0] filtered_index2, final_index2 = rayHits1Dv2( second_ray_payload, target_indexing, sink_index ) mem_dt = timer() - start else: if (filtered_index.shape[0] * sink_index.shape[0]) < ray_limit: target_indexing = create_model_index( filtered_index, sink_index, np.empty((0, 0), dtype=np.int32) ) # only target rays at sinks second_ray_payload = charge_rays_environment1Dv2( sources, sinks, scattering_points, target_indexing ) # print('Chunking Raycaster Triangles ', len(environment_local)) # max_tris=2**18 # triangle_chunk=np.linspace(0,tri_num,math.ceil(tri_num/max_tris)+1,dtype=np.int32) # chunk_size=2**11 threads_in_block = 256 # ray_chunks=np.linspace(0,ray_num,math.ceil(ray_num/chunk_size)+1,dtype=np.int32) # for idx in range(len(triangle_chunk)-1): d_environment = cuda.device_array( len(environment_local), dtype=base_types.triangle_t ) cuda.to_device(environment_local, to=d_environment) d_chunk_payload = cuda.device_array( [second_ray_payload.shape[0]], dtype=base_types.ray_t ) cuda.to_device(second_ray_payload, to=d_chunk_payload) # # ray_temp=np.empty((chunk_ray_size),dtype=np.bool) # ray_temp[:]=first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect'] # distance_temp=np.empty((chunk_ray_size),dtype=np.float32) # distance_temp[:]=math.inf # dist_list=cuda.to_device(distance_temp) # d_ray_flag=cuda.to_device(ray_temp) # Here, we choose the granularity of the threading on our device. We want # to try to cover the entire workload of rays and targets with simulatenous threads, so we'll # choose a grid of (source_num/16. target_num/16) blocks, each with (16, 16) threads grids = math.ceil(second_ray_payload.shape[0] / threads_in_block) threads = threads_in_block # Execute the kernel # cuda.profile_start() # kernel1Dv2[grids, threads](d_chunk_payload,d_environment,d_ray_flag) kernel1Dv3[grids, threads](d_chunk_payload, d_environment) # cuda.profile_stop() # distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host() second_ray_payload = d_chunk_payload.copy_to_host() # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect']=d_chunk_payload['intersect'].copy_to_host() # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['dist']=d_chunk_payload['dist'].copy_to_host() # cuda.close() kernel_dt = timer() - raystart start = timer() RAYS_CAST += second_ray_payload.shape[0] filtered_index2, final_index2 = rayHits1Dv2( second_ray_payload, target_indexing, sink_index ) mem_dt = timer() - start else: # the rays must fit in GPU memory, aim for no more than 80% utilisation # establish memory limits sub_filtered_index = np.array_split( filtered_index, np.ceil( filtered_index.shape[0] * sink_index.shape[0] / ray_limit ).astype(int), ) chunknum = len(sub_filtered_index) filtered_index2 = np.empty((0, filtered_index.shape[1] + 1), dtype=np.int32) final_index2 = np.empty((0, filtered_index.shape[1] + 1), dtype=np.int32) # print('chunking total of ',target_indexing.shape[0],' rays in ',chunknum,' batches') for chunkindex in range(chunknum): sub_target = create_model_index( sub_filtered_index[chunkindex], sink_index, np.empty((0, 0), dtype=np.int32), ) # cycle the raycaster over the sub arrays threads_in_block = 256 second_ray_payload = charge_rays_environment1Dv2( sources, sinks, scattering_points, sub_target ) # ray_chunks=np.linspace(0,ray_num,math.ceil(ray_num/chunk_size)+1,dtype=np.int32) # for idx in range(len(triangle_chunk)-1): d_environment = cuda.device_array( len(environment_local), dtype=base_types.triangle_t ) cuda.to_device(environment_local, to=d_environment) d_chunk_payload = cuda.device_array( [second_ray_payload.shape[0]], dtype=base_types.ray_t ) cuda.to_device(second_ray_payload, to=d_chunk_payload) # # ray_temp=np.empty((chunk_ray_size),dtype=np.bool) # ray_temp[:]=first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect'] # distance_temp=np.empty((chunk_ray_size),dtype=np.float32) # distance_temp[:]=math.inf # dist_list=cuda.to_device(distance_temp) # d_ray_flag=cuda.to_device(ray_temp) # Here, we choose the granularity of the threading on our device. We want # to try to cover the entire workload of rays and targets with simulatenous threads, so we'll # choose a grid of (source_num/16. target_num/16) blocks, each with (16, 16) threads grids = math.ceil(second_ray_payload.shape[0] / threads_in_block) threads = threads_in_block # Execute the kernel # cuda.profile_start() # kernel1Dv2[grids, threads](d_chunk_payload,d_environment,d_ray_flag) kernel1Dv3[grids, threads](d_chunk_payload, d_environment) # cuda.profile_stop() # distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host() second_ray_payload = d_chunk_payload.copy_to_host() # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect']=d_chunk_payload['intersect'].copy_to_host() # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['dist']=d_chunk_payload['dist'].copy_to_host() # cuda.close() kernel_dt = timer() - raystart start = timer() RAYS_CAST += second_ray_payload.shape[0] temp_filtered_index2, temp_final_index2 = rayHits1Dv2( second_ray_payload, sub_target, sink_index ) mem_dt = timer() - start filtered_index2 = np.append( filtered_index2, temp_filtered_index2, axis=0 ) final_index2 = np.append(final_index2, temp_final_index2, axis=0) # deallocate memory on gpu # ctx = cuda.current_context() # ctx.reset() # if target_indexing.shape[0] >= ray_limit: # # need to split the array and process seperatly # sub_target = np.array_split( # target_indexing, np.ceil(target_indexing.shape[0] / ray_limit).astype(int) # ) # chunknum = len(sub_target) # filtered_index2 = np.empty((0, target_indexing.shape[1]), dtype=np.int32) # final_index2 = np.empty((0, target_indexing.shape[1]), dtype=np.int32) # # print('chunking total of ',target_indexing.shape[0],' rays in ',chunknum,' batches') # for chunkindex in range(chunknum): # # cycle the raycaster over the sub arrays # threads_in_block = 256 # second_ray_payload = charge_rays_environment1Dv2( # sources, sinks, scattering_points, sub_target[chunkindex] # ) # # ray_chunks=np.linspace(0,ray_num,math.ceil(ray_num/chunk_size)+1,dtype=np.int32) # # for idx in range(len(triangle_chunk)-1): # d_environment = cuda.device_array( # len(environment_local), dtype=base_types.triangle_t # ) # cuda.to_device(environment_local, to=d_environment) # d_chunk_payload = cuda.device_array( # [second_ray_payload.shape[0]], dtype=base_types.ray_t # ) # cuda.to_device(second_ray_payload, to=d_chunk_payload) # # # # ray_temp=np.empty((chunk_ray_size),dtype=np.bool) # # ray_temp[:]=first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect'] # # distance_temp=np.empty((chunk_ray_size),dtype=np.float32) # # distance_temp[:]=math.inf # # dist_list=cuda.to_device(distance_temp) # # d_ray_flag=cuda.to_device(ray_temp) # # Here, we choose the granularity of the threading on our device. We want # # to try to cover the entire workload of rays and targets with simulatenous threads, so we'll # # choose a grid of (source_num/16. target_num/16) blocks, each with (16, 16) threads # grids = math.ceil(second_ray_payload.shape[0] / threads_in_block) # threads = threads_in_block # # Execute the kernel # # cuda.profile_start() # # kernel1Dv2[grids, threads](d_chunk_payload,d_environment,d_ray_flag) # kernel1Dv3[grids, threads](d_chunk_payload, d_environment) # # cuda.profile_stop() # # distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host() # second_ray_payload = d_chunk_payload.copy_to_host() # # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect']=d_chunk_payload['intersect'].copy_to_host() # # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['dist']=d_chunk_payload['dist'].copy_to_host() # # cuda.close() # kernel_dt = timer() - raystart # start = timer() # RAYS_CAST += second_ray_payload.shape[0] # temp_filtered_index2, temp_final_index2 = rayHits1Dv2( # second_ray_payload, sub_target[chunkindex], sink_index # ) # mem_dt = timer() - start # filtered_index2 = np.append(filtered_index2, temp_filtered_index2, axis=0) # final_index2 = np.append(final_index2, temp_final_index2, axis=0) # # deallocate memory on gpu # # ctx = cuda.current_context() # # ctx.reset() # else: # deallocate memory on gpu # ctx = cuda.current_context() # deallocs = ctx.deallocations # deallocs.clear() # print("Second Stage: Prep {:3.1f} s, Raycasting {:3.1f} s, Path Processing {:3.1f} s".format(prep_dt,kernel_dt,mem_dt) ) return filtered_index2, final_index2, RAYS_CAST def charge_shadow_rays1D(sources, sinks, point_indexing, target_indexing): """ Generate Ray Payload from numpy arrays of sources, sinks, and environment points, in a 1 dimensional array Parameters ---------- sources : TYPE DESCRIPTION. sinks : TYPE DESCRIPTION. environment_points : numpy array of xyz points for scattering in environment point_indexing : (n*m) by 2 array, 0 has the source index, and 1 has the sink index, otherwise nans' Returns ------- temp_ray_payload : array of ray_t type ray payload to be sent to GPU """ idx = 0 targets = sinks target_num = len(targets) source_num, origin_width = np.shape(sources) origins = np.zeros((source_num * target_num, origin_width), dtype=np.float32) source_sink_index = np.full((source_num * target_num, 2), np.nan, dtype=np.int32) temp_ray_payload = np.empty([source_num * target_num], dtype=base_types.ray_t) temp_ray_payload, origins, source_sink_index = ray_charge_core_final_vector( temp_ray_payload, origins, source_sink_index, sources, targets, point_indexing, target_indexing, ) return temp_ray_payload, origins, source_sink_index def ray_charge_core_final_vector( temp_ray_payload, origins, source_sink_index, sources, targets, point_indexing, target_indexing, ): # using array allocation # idx=0 target_num = len(targets) source_num, origin_width = np.shape(sources) directions = np.zeros((len(targets), 3), dtype=np.float32) local_sources = np.zeros((len(targets), origin_width), dtype=np.float32) norm_length = np.zeros((len(targets), 1), dtype=np.float32) source_chunking = np.linspace( 0, len(temp_ray_payload), math.ceil(len(temp_ray_payload) / target_num) + 1, dtype=np.int32, ) for n in range(len(source_chunking) - 1): local_sources[:, -origin_width:] = sources[n, :] directions, norm_length = math_functions.calc_dv_norm( local_sources[:, -3:], targets, directions, norm_length ) temp_ray_payload[source_chunking[n] : source_chunking[n + 1]]["ox"] = ( local_sources[:, -3] ) temp_ray_payload[source_chunking[n] : source_chunking[n + 1]]["oy"] = ( local_sources[:, -2] ) temp_ray_payload[source_chunking[n] : source_chunking[n + 1]]["oz"] = ( local_sources[:, -1] ) temp_ray_payload[source_chunking[n] : source_chunking[n + 1]]["dx"] = ( directions[:, 0] ) temp_ray_payload[source_chunking[n] : source_chunking[n + 1]]["dy"] = ( directions[:, 1] ) temp_ray_payload[source_chunking[n] : source_chunking[n + 1]]["dz"] = ( directions[:, 2] ) # temp_ray_payload[source_chunking[n]:source_chunking[n+1]]['tx']=targets[:,0] # temp_ray_payload[source_chunking[n]:source_chunking[n+1]]['ty']=targets[:,1] # temp_ray_payload[source_chunking[n]:source_chunking[n+1]]['tz']=targets[:,2] temp_ray_payload[source_chunking[n] : source_chunking[n + 1]]["dist"] = ( norm_length[:, 0] ) temp_ray_payload[source_chunking[n] : source_chunking[n + 1]][ "intersect" ] = False origins[source_chunking[n] : source_chunking[n + 1], -origin_width:] = sources[ n, : ] source_sink_index[source_chunking[n] : source_chunking[n + 1], 0] = ( point_indexing[n, 0] ) source_sink_index[source_chunking[n] : source_chunking[n + 1], 1] = ( target_indexing.ravel() ) return temp_ray_payload, origins, source_sink_index def shadowRaycaster(filtered_network, sinks, filtered_index, environment_local): start = timer() target_indexing = np.full((len(sinks), 1), np.nan, dtype=np.float32).ravel() target_indexing[:] = range(len(sinks)) second_ray_payload, second_origin, io_index2 = charge_shadow_rays1D( filtered_network, sinks, filtered_index, target_indexing ) prep_dt = timer() - start raystart = timer() ray_num = len(second_ray_payload) tri_num = len(environment_local) max_tris = 2**16 triangle_chunk = np.linspace( 0, tri_num, math.ceil(tri_num / max_tris) + 1, dtype=np.int32 ) # Create a container for the pixel RGBA information of our image chunk_size = 2**21 threads_in_block = 1024 ray_chunks = np.linspace( 0, ray_num, math.ceil(ray_num / chunk_size) + 1, dtype=np.int32 ) # sync_stream=cuda.stream() for idx in range(len(triangle_chunk) - 1): d_environment = cuda.device_array( len(environment_local[triangle_chunk[idx] : triangle_chunk[idx + 1]]), dtype=base_types.triangle_t, ) cuda.to_device( environment_local[triangle_chunk[idx] : triangle_chunk[idx + 1]], to=d_environment, ) for n in range(len(ray_chunks) - 1): chunk_payload = second_ray_payload[ray_chunks[n] : ray_chunks[n + 1]] chunk_ray_size = len(chunk_payload) distance_temp = np.empty((chunk_ray_size), dtype=np.float32) distance_temp[:] = math.inf ray_temp = np.empty((chunk_ray_size), dtype=bool) ray_temp[:] = False dist_list = cuda.to_device(distance_temp) d_ray_flag = cuda.to_device(ray_temp) # d_chunk_payload = cuda.device_array([chunk_ray_size], dtype=ray_t) d_chunk_payload = cuda.device_array( [chunk_ray_size], dtype=base_types.ray_t ) cuda.to_device(chunk_payload, to=d_chunk_payload) # Here, we choose the granularity of the threading on our device. We want # to try to cover the entire workload of rays and targets with simulatenous threads, so we'll # choose a grid of (source_num/16. target_num/16) blocks, each with (16, 16) threads grids = math.ceil(chunk_ray_size / threads_in_block) threads = min(ray_chunks[1] - ray_chunks[0], threads_in_block) # Execute the kernel # cuda.profile_start() kernel1D[grids, threads]( d_chunk_payload, d_environment, d_ray_flag, dist_list ) # second_ray_payload[ray_chunks[n]:ray_chunks[n+1]]=d_chunk_payload.copy_to_host() second_ray_payload[ray_chunks[n] : ray_chunks[n + 1]][ "intersect" ] = d_ray_flag.copy_to_host() # second_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['dist']=dist_list.copy_to_host() # cuda.profile_stop() # distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host() # sync_stream.synchronize() kernel_dt = timer() - raystart menstart = timer() RAYS_CAST = ray_num filtered_network2, final_network2, filtered_index2, final_index2 = rayHits1D( second_ray_payload, io_index2, scatter_inc=1, origins_local=second_origin ) mem_dt = timer() - menstart print( "Stage: Prep {:3.1f} s, Raycasting {:3.1f} s, Path Processing {:3.1f} s".format( prep_dt, kernel_dt, mem_dt ) ) return filtered_network2, final_network2, filtered_index2, final_index2, RAYS_CAST def rayHits1D( ray_payload, point_indexing, scatter_inc=1, origins_local=None, initial_network=None ): """ filtering process for each raycasting stage, seprating rays which hit sinks from non-terminating rays. ---------- ray_payload : TYPE DESCRIPTION. point_indexing : TYPE DESCRIPTION. scatter_inc : TYPE, optional DESCRIPTION. The default is 1. origins_local : TYPE, optional DESCRIPTION. The default is None. initial_network : TYPE, optional DESCRIPTION. The default is None. Returns ------- filtered_network : TYPE DESCRIPTION. final_network : TYPE DESCRIPTION. filtered_index : TYPE DESCRIPTION. final_index : TYPE DESCRIPTION. """ hitmap = ~ray_payload[:]["intersect"] filter_vector = np.ravel( np.nonzero(np.all(np.asarray([hitmap, np.isnan(point_indexing[:, 1])]), axis=0)) ) output_vector = np.ravel( np.nonzero( np.all(np.asarray([hitmap, ~np.isnan(point_indexing[:, 1])]), axis=0) ) ) # final_network = np.zeros( (np.max(np.shape(output_vector)), 3 + (scatter_inc * 3)), dtype=np.float32 ) final_network[:, 0:-3] = origins_local[output_vector, :] final_network[:, -3] = ray_payload[output_vector]["tx"] final_network[:, -2] = ray_payload[output_vector]["ty"] final_network[:, -1] = ray_payload[output_vector]["tz"] # filtered_network = np.zeros( (np.max(np.shape(filter_vector)), 3 + (scatter_inc * 3)), dtype=np.float32 ) filtered_network[:, 0:-3] = origins_local[filter_vector, :] filtered_network[:, -3] = ray_payload[filter_vector]["tx"] filtered_network[:, -2] = ray_payload[filter_vector]["ty"] filtered_network[:, -1] = ray_payload[filter_vector]["tz"] # # final_network=hit_network[output_vector,:] # filtered_network=np.delete(hit_network,output_vector,axis=0) final_index = point_indexing[output_vector, :] filtered_index = point_indexing[filter_vector, :] return filtered_network, final_network, filtered_index, final_index def rotate_view(vis): ctr = vis.get_view_control() ctr.rotate(2.0, 0.0) return False # @njit def create_model_index(source_index, sink_index, scattering_point_index): """ Create an index for the model environment, first sources, then sinks, the scattering points in an uninteruppted chain Thus Parameters ---------- sources : TYPE DESCRIPTION. sinks : TYPE DESCRIPTION. scattering_points : TYPE DESCRIPTION. Returns ------- io_indexing : TYPE DESCRIPTION. """ io_indexing = np.zeros( ( source_index.shape[0] * (sink_index.shape[0] + scattering_point_index.shape[0]), source_index.shape[1] + 1, ), dtype=np.int32, ) idx = 0 for n in np.arange(0, source_index.shape[0]): if scattering_point_index.shape[0] == 0: io_indexing[ np.arange( idx, idx + (sink_index.shape[0] + scattering_point_index.shape[0]) ), -1, ] = sink_index.ravel() else: io_indexing[ np.arange( idx, idx + (sink_index.shape[0] + scattering_point_index.shape[0]) ), -1, ] = np.append(sink_index, scattering_point_index, axis=0).ravel() io_indexing[ np.arange( idx, idx + (sink_index.shape[0] + scattering_point_index.shape[0]) ), 0 : source_index.shape[1], ] = source_index[n, :] idx += sink_index.shape[0] + scattering_point_index.shape[0] # then boolean check and filter to ensure there are no loops, aiming next ray at originating point return io_indexing[np.not_equal(io_indexing[:, -2], io_indexing[:, -1]), :] def workchunkingv2( sources, sinks, scattering_points, environment, max_scatter, line_of_sight=True ): """ :meta private: Raycasting index creation and assignment to raycaster, upper bound is around 4.7e8 rays at a time, there is already chunking to prevent overflow of the GPU memory and timeouts Parameters ---------- sources : n*3 numpy array of float sinks : m*3 numpy array of float scattering_points : o*3 numpy array of float environment : numpy array of triangle_t max_scatter : int line_of_sight : boolean Returns --------- full_index : 2D numpy array of ints the index for all successful rays cast from source coordinates, to any scattering points, to the sink point for each entry RAYS_CAST : int the number of rays cast in this launch. """ # temp function to chunk the number of rays to prevent creation of ray arrays to large for memory # print('WorkChunking Triangles ',len(environment)) RAYS_CAST = 0 raycasting_timestamp = timer() ray_estimate = ( sources.shape[0] * (sinks.shape[0] + scattering_points.shape[0]) ) + ( sources.shape[0] * (scattering_points.shape[0] * sinks.shape[0] * (max_scatter)) ) # print("Total of {:3.1f} rays required".format(ray_estimate)) # Clear GPU memory for simulation cuda.current_context().memory_manager.deallocations.clear() # establish memory limits free_mem, total_mem = cuda.current_context().get_memory_info() max_mem = np.ceil(free_mem * 0.8).astype(np.int64) ray_limit = ( np.floor(np.floor((max_mem - environment.nbytes) / base_types.ray_t.size)) ).astype(np.int64) # establish index boundaries source_index = np.arange(1, sources.shape[0] + 1).reshape( sources.shape[0], 1 ) # index starts at 1, leaving 0 as a null sink_index = np.arange( sources.shape[0] + 1, sources.shape[0] + 1 + sinks.shape[0] ).reshape(sinks.shape[0], 1) scattering_point_index = np.arange( np.max(sink_index) + 1, np.max(sink_index) + 1 + scattering_points.shape[0] ).reshape(scattering_points.shape[0], 1) # implement chunking if max_scatter == 1: full_index = np.empty((0, 2), dtype=np.int32) io_indexing = create_model_index( source_index, sink_index, scattering_point_index ) if io_indexing.shape[0] >= ray_limit: # need to split the array and process seperatly sub_io = np.array_split( io_indexing, np.ceil(io_indexing.shape[0] / ray_limit).astype(int) ) chunknum = len(sub_io) for chunkindex in range(chunknum): _, temp_index, first_wave_Rays = launchRaycaster1Dv3( sources, sinks, scattering_points, sub_io[chunkindex], copy.deepcopy(environment), ) full_index = np.append(full_index, temp_index, axis=0) RAYS_CAST += first_wave_Rays else: _, temp_index, first_wave_Rays = launchRaycaster1Dv3( sources, sinks, scattering_points, io_indexing, copy.deepcopy(environment), ) full_index = np.append(full_index, temp_index, axis=0) RAYS_CAST = first_wave_Rays elif max_scatter == 2: full_index = np.empty((0, 3), dtype=np.int32) if line_of_sight: io_indexing = create_model_index( source_index, sink_index, scattering_point_index ) else: # drop line of sight rays from queue io_indexing = create_model_index( source_index, np.empty((0, 1)), scattering_point_index ) if io_indexing.shape[0] >= ray_limit: # need to split the array and process separately sub_io = np.array_split( io_indexing, np.ceil(io_indexing.shape[0] / ray_limit).astype(int) ) chunknum = len(sub_io) final_index = np.empty((0, 3), dtype=np.int32) RAYS_CAST = 0 for chunkindex in range(chunknum): ( temp_filtered_index, temp_final_index, first_wave_Rays, ) = launchRaycaster1Dv3( sources, sinks, scattering_points, sub_io[chunkindex], copy.deepcopy(environment), ) # filtered_index = np.append(filtered_index, temp_filtered_index, axis=0) # final_index = np.append(final_index, temp_final_index, axis=0) if temp_filtered_index.shape[0] == 0: temp_filtered_index2 = np.empty((0, 3), dtype=np.int32) temp_final_index2 = np.empty((0, 3), dtype=np.int32) second_wave_rays = 0 else: ( temp_filtered_index2, temp_final_index2, second_wave_rays, ) = chunkingRaycaster1Dv3( sources, sinks, scattering_points, temp_filtered_index, environment, terminate_flag=True, ) RAYS_CAST += first_wave_Rays + second_wave_rays # create combined path network host_size = np.shape(temp_final_index) host_padding = np.zeros((host_size[0], 1), dtype=np.int32) temp_full_index = np.append( np.append(temp_final_index, host_padding, axis=1), temp_final_index2, axis=0, ) full_index = np.append(full_index, temp_full_index, axis=0) else: filtered_index, final_index, first_wave_Rays = launchRaycaster1Dv3( sources, sinks, scattering_points, io_indexing, copy.deepcopy(environment), ) if filtered_index.shape[0] == 0: filtered_index2 = np.empty((0, 3), dtype=np.int32) final_index2 = np.empty((0, 3), dtype=np.int32) second_wave_rays = 0 else: filtered_index2, final_index2, second_wave_rays = chunkingRaycaster1Dv3( sources, sinks, scattering_points, filtered_index, environment, terminate_flag=True, ) # cuda.profile_stop() # print('WorkChunkingMaxScatter2 Triangles ', len(environment), 'Filtered Rays Stage 2', len(filtered_index2), 'Final Rays Stage 2',len(final_index2)) start = timer() # filtered_network2,final_network2,filtered_index2,final_index2=rayHits(second_ray_payload,distmap2,io_index2,scatter_inc=2,origins_local=second_origin) RAYS_CAST = first_wave_Rays + second_wave_rays # create combined path network host_size = np.shape(final_index) host_padding = np.zeros((host_size[0], 1), dtype=np.int32) temp_full_index = np.append( np.append(final_index, host_padding, axis=1), final_index2, axis=0 ) full_index = np.append(full_index, temp_full_index, axis=0) elif max_scatter == 3: full_index = np.empty((0, 4), dtype=np.int32) chunknum = np.minimum(sources.shape[0], np.ceil(ray_estimate / 2e8).astype(int)) chunknum = np.maximum(2, chunknum) source_chunking = np.linspace(0, sources.shape[0], chunknum, dtype=np.int32) for chunkindex in range(source_chunking.size - 1): if line_of_sight: io_indexing = create_model_index( source_index[ source_chunking[chunkindex] : source_chunking[chunkindex + 1] ], sink_index, scattering_point_index, ) filtered_index, final_index, first_wave_Rays = launchRaycaster1Dv3( sources, sinks, scattering_points, io_indexing, environment ) else: # drop line of sight rays from queue io_indexing = create_model_index( source_index[ source_chunking[chunkindex] : source_chunking[chunkindex + 1] ], np.empty((0, 1)), scattering_point_index, ) filtered_index, final_index, first_wave_Rays = launchRaycaster1Dv3( sources, sinks, scattering_points, io_indexing, environment ) filtered_index2, final_index2, second_wave_rays = chunkingRaycaster1Dv3( sources, sinks, scattering_points, filtered_index, environment, False ) if filtered_index2.shape[0] * sinks.shape[0] > 2e8: temp_chunks = np.ceil( (filtered_index2.shape[0] * sinks.shape[0]) / 2e8 ).astype(int) temp_chunking = np.linspace( 0, filtered_index2.shape[0], temp_chunks, dtype=np.int32 ) temp_index = np.empty((0, 4), dtype=np.int32) for temp_chunkindex in range(temp_chunking.shape[0] - 1): ( filtered_index3, final_index3_part, third_wave_rays, ) = chunkingRaycaster1Dv3( sources, sinks, scattering_points, filtered_index2[ temp_chunking[temp_chunkindex] : temp_chunking[ temp_chunkindex + 1 ], :, ], environment, True, ) temp_index = np.append(temp_index, final_index3_part, axis=0) final_index3 = temp_index else: filtered_index3, final_index3, third_wave_rays = chunkingRaycaster1Dv3( sources, sinks, scattering_points, filtered_index2, environment, True, ) # cuda.profile_stop() start = timer() # filtered_network2,final_network2,filtered_index2,final_index2=rayHits(second_ray_payload,distmap2,io_index2,scatter_inc=2,origins_local=second_origin) RAYS_CAST = first_wave_Rays + second_wave_rays + third_wave_rays # create combined path network host_size = np.shape(final_index) host_nans = np.zeros((host_size[0], 1), dtype=np.int32) temp_total = np.append( np.append(final_index, host_nans, axis=1), final_index2, axis=0 ) host_size2 = np.shape(temp_total) host_nans2 = np.zeros((host_size2[0], 1), dtype=np.int32) full_index_temp = np.append( np.append(temp_total, host_nans2, axis=1), final_index3, axis=0 ) full_index = np.append(full_index, full_index_temp, axis=0) # process for scattering matrix raycastingduration = raycasting_timestamp - timer() # print("Raycasting Duration: {:3.1f} s, Total Rays: {:3.1f}".format(raycastingduration,RAYS_CAST) ) return full_index, RAYS_CAST def create_scatter_index( source_index, sink_index, scattering_point_index, scattering_mask ): # create target index for given scattering depth target_index = np.empty((0, np.max(scattering_mask) + 2), dtype=np.int32) # temp_index=create_model_index(source_index, sink_index, scattering_point_index) # stop_index=temp_index[np.isin(temp_index[:,-1],sink_index),:] # start_index=temp_index[~np.isin(temp_index[:,-1],sink_index),:] # final_index=create_model_index(start_index, sink_index, scattering_point_index) # target_index=np.append(np.append(stop_index,np.zeros((stop_index.shape[0],1),dtype=np.int32),axis=1),final_index,axis=0) for launch_round in range(np.max(scattering_mask) + 1): # create list of allowed points which always has all sinks. if launch_round == 0: allowed_illuminators = source_index # allowed_points=np.append(np.where(scattering_mask>=(launch_round+1))[0]+1,(np.where(scattering_mask==0)[0]+1)) # allowed_points=copy.deepcopy(sink_index).ravel() allowed_scatter_points = ( np.where(scattering_mask >= (launch_round + 1))[0] + 1 ) temp_index = create_model_index( allowed_illuminators, sink_index, allowed_scatter_points.reshape(allowed_scatter_points.shape[0], 1), ) target_index = np.append( target_index, np.append( temp_index, np.zeros( ( temp_index.shape[0], target_index.shape[1] - temp_index.shape[1], ), dtype=np.int32, ), axis=1, ), axis=0, ) else: allowed_illuminators = ( np.where(scattering_mask >= (launch_round + 1))[0] + 1 ) # allowed_points=np.append(np.where(scattering_mask>=(launch_round+1))[0]+1,(np.where(scattering_mask==0)[0]+1)) allowed_scatter_points = scattering_point_index propagate_index = np.where( target_index[:, launch_round] > np.max(sink_index) )[0] drain_trim = np.delete( propagate_index, np.isin( target_index[propagate_index, launch_round], allowed_illuminators ), ) drain_temp = create_model_index( target_index[drain_trim, 0 : launch_round + 1], sink_index, np.empty((0)), ) illuminator_index = propagate_index[ np.isin( target_index[propagate_index, launch_round], allowed_illuminators ) ] forward_temp = create_model_index( target_index[illuminator_index, 0 : launch_round + 1], sink_index, allowed_scatter_points.reshape(allowed_scatter_points.shape[0], 1), ) trimmed_index = np.delete(target_index, propagate_index, axis=0) target_index = np.append( trimmed_index, np.append( np.append(drain_temp, forward_temp, axis=0), np.zeros( ( drain_temp.shape[0] + forward_temp.shape[0], target_index.shape[1] - drain_temp.shape[1], ), dtype=np.int32, ), axis=1, ), axis=0, ) # so far this return target_index def integratedraycastersetup( sources, sinks, scattering_points, environment, scattering_mask ): # temp function to chunk the number of rays to prevent creation of ray arrays to large for memory raycasting_timestamp = timer() max_scatter = np.max(scattering_mask) if max_scatter == 0: ray_estimate = sources * sinks elif max_scatter == 1: ray_estimate = ( sources * (sinks + scattering_points.shape[0]) + sources * scattering_points.shape[0] * sinks ) elif max_scatter == 2: ray_estimate = ( sources * (sinks + scattering_points.shape[0]) + sources * scattering_points.shape[0] * sinks + sources * scattering_points.shape[0] * (scattering_points.shape[0]) * sinks ) # print("Total of {:3.1f} rays required".format(ray_estimate)) # establish index boundaries source_index = np.arange(1, sources + 1).reshape( sources, 1 ) # index starts at 1, leaving 0 as a null sink_index = np.arange(sources + 1, sources + 1 + sinks).reshape(sinks, 1) scattering_point_index = np.arange( np.max(sink_index) + 1, np.max(sink_index) + 1 + (scattering_points.shape[0] - sources - sinks), ).reshape((scattering_points.shape[0] - sources - sinks), 1) # implement chunking full_index = np.empty((0, np.max(scattering_mask) + 2), dtype=np.int32) chunknum = np.minimum(sources, np.ceil(ray_estimate / 2e8).astype(int)) chunknum = np.maximum(2, chunknum) source_chunking = np.linspace(0, sources, chunknum, dtype=np.int32) for chunkindex in range(source_chunking.size - 1): io_indexing = create_scatter_index( source_index[source_chunking[chunkindex] : source_chunking[chunkindex + 1]], sink_index, scattering_point_index, scattering_mask, ) # io_indexing=create_model_index(source_index[source_chunking[chunkindex]:source_chunking[chunkindex+1]],sink_index,np.empty((0,1),dtype=np.int32)) full_index = np.append( full_index, integratedRaycaster(io_indexing, scattering_points, environment), axis=0, ) # full_index=create_scatter_index(source_index,sink_index,scattering_point_index,scattering_mask) raycastingduration = timer() - raycasting_timestamp # print("Raycasting Duration: {:3.1f} s, Total Rays: {:3.1f}".format(raycastingduration,full_index.shape[0]) ) return full_index, io_indexing if __name__ == "__main__": print("this is a library")