Source code for lyceanem.raycasting.rayfunctions

# -*- coding: utf-8 -*-
import copy
import math
from timeit import default_timer as timer

import numba as nb
import numpy as np
import open3d as o3d
import scipy.stats
from matplotlib import cm
from numba import cuda, float32, jit, njit, guvectorize, prange
from numpy.linalg import norm
from scipy.spatial import distance

import lyceanem.base_types as base_types
from ..utility import math_functions as math_functions
import lyceanem.electromagnetics.empropagation as EM

EPSILON = 1e-6  # how close to zero do we consider zero? example used 1e-7

# # A numpy record array (like a struct) to record triangle
# point_data = np.dtype([
#     # conductivity, permittivity and permiability
#     #free space values should be
#     #permittivity of free space 8.8541878176e-12F/m
#     #permeability of free space 1.25663706212e-6H/m
#     ('permittivity', 'c8'), ('permeability', 'c8'),
#     #electric or magnetic current sources? E if True
#     ('Electric', '?'),
#     ], align=True)
# point_t = from_dtype(point_data) # Create a type that numba can recognize!
#
# # A numpy record array (like a struct) to record triangle
# triangle_data = np.dtype([
#     # v0 data
#     ('v0x', 'f4'), ('v0y', 'f4'), ('v0z', 'f4'),
#     # v1 data
#     ('v1x', 'f4'),  ('v1y', 'f4'), ('v1z', 'f4'),
#     # v2 data
#     ('v2x', 'f4'),  ('v2y', 'f4'), ('v2z', 'f4'),
#     # normal vector
#     #('normx', 'f4'),  ('normy', 'f4'), ('normz', 'f4'),
#     # ('reflection', np.float64),
#     # ('diffuse_c', np.float64),
#     # ('specular_c', np.float64),
#     ], align=True)
# triangle_t = from_dtype(triangle_data) # Create a type that numba can recognize!
#
# # ray class, to hold the ray origin, direction, and eventuall other data.
# ray_data=np.dtype([
#     #origin data
#     ('ox','f4'),('oy','f4'),('oz','f4'),
#     #direction vector
#     ('dx','f4'),('dy','f4'),('dz','f4'),
#     #target
#     #direction vector
#     #('tx','f4'),('ty','f4'),('tz','f4'),
#     #distance traveled
#     ('dist','f4'),
#     #intersection
#     ('intersect','?'),
#     ],align=True)
# ray_t = from_dtype(ray_data) # Create a type that numba can recognize!
# # We can use that type in our device functions and later the kernel!
#
# scattering_point = np.dtype([
#     #position data
#     ('px', 'f4'), ('py', 'f4'), ('pz', 'f4'),
#     #velocity
#     ('vx', 'f4'), ('vy', 'f4'), ('vz', 'f4'),
#     #normal
#     ('nx', 'f4'), ('ny', 'f4'), ('nz', 'f4'),
#     #weights
#     ('ex','c8'),('ey','c8'),('ez','c8'),
#     # conductivity, permittivity and permiability
#     #free space values should be
#     #permittivity of free space 8.8541878176e-12F/m
#     #permeability of free space 1.25663706212e-6H/m
#     ('permittivity', 'c8'), ('permeability', 'c8'),
#     #electric or magnetic current sources? E if True
#     ('Electric', '?'),
#     ], align=True)
# scattering_t = from_dtype(scattering_point) # Create a type that numba can recognize!

#
# @njit
# def cart2pol(x, y):
#     rho = np.sqrt(x ** 2 + y ** 2)
#     phi = np.arctan2(y, x)
#     return (rho, phi)
#
#
# @njit
# def pol2cart(rho, phi):
#     x = rho * np.cos(phi)
#     y = rho * np.sin(phi)
#     return (x, y)
#
#
# @njit
# def cart2sph(x, y, z):
#     # radians
#     hxy = np.hypot(x, y)
#     r = np.hypot(hxy, z)
#     el = np.arctan2(z, hxy)
#     az = np.arctan2(y, x)
#     return az, el, r
#
#
# @njit
# def sph2cart(az, el, r):
#     # radians
#     rcos_theta = r * np.cos(el)
#     x = rcos_theta * np.cos(az)
#     y = rcos_theta * np.sin(az)
#     z = r * np.sin(el)
#     return x, y, z
#
#
# @njit
# def calc_normals(T):
#     # calculate triangle norm
#     e1x = T["v1x"] - T["v0x"]
#     e1y = T["v1y"] - T["v0y"]
#     e1z = T["v1z"] - T["v0z"]
#
#     e2x = T["v2x"] - T["v0x"]
#     e2y = T["v2y"] - T["v0y"]
#     e2z = T["v2z"] - T["v0z"]
#
#     dirx = e1y * e2z - e1z * e2y
#     diry = e1z * e2x - e1x * e2z
#     dirz = e1x * e2y - e1y * e2x
#
#     normconst = math.sqrt(dirx ** 2 + diry ** 2 + dirz ** 2)
#
#     T["normx"] = dirx / normconst
#     T["normy"] = diry / normconst
#     T["normz"] = dirz / normconst
#
#     return T
#
#
# @guvectorize(
#     [(float32[:], float32[:], float32[:], float32)],
#     "(n),(n)->(n),()",
#     target="parallel",
# )
# def fast_calc_dv(source, target, dv, normconst):
#     dirx = target[0] - source[0]
#     diry = target[1] - source[1]
#     dirz = target[2] - source[2]
#     normconst = math.sqrt(dirx ** 2 + diry ** 2 + dirz ** 2)
#     dv = np.array([dirx, diry, dirz]) / normconst
#
#
# @njit
# def calc_dv(source, target):
#     dirx = target[0] - source[0]
#     diry = target[1] - source[1]
#     dirz = target[2] - source[2]
#     normconst = np.sqrt(dirx ** 2 + diry ** 2 + dirz ** 2)
#     dv = np.array([dirx, diry, dirz]) / normconst
#     return dv[0], dv[1], dv[2], normconst


# @njit
# def calc_dv_norm(source,target,direction,length):
#    length[:,0]=np.sqrt((target[:,0]-source[:,0])**2+(target[:,1]-source[:,1])**2+(target[:,2]-source[:,2])**2)
#    direction=(target-source)/length
#    return direction, length


@cuda.jit(device=True)
def dot(ax1, ay1, az1, ax2, ay2, az2):
    result = ax1 * ax2 + ay1 * ay2 + az1 * az2
    return result


@cuda.jit(device=True)
def cross(ax1, ay1, az1, ax2, ay2, az2):
    rx = ay1 * az2 - az1 * ay2
    ry = az1 * ax2 - ax1 * az2
    rz = ax1 * ay2 - ay1 * ax2
    return rx, ry, rz


# @cuda.jit(boolean,float32(ray_t, triangle_t), device=True, inline=False)
@cuda.jit(device=True, inline=False)
def hit(ray, triangle):
    """ Compute compute whether the defined ray will intersect with the defined triangle
    using the  Möller–Trumbore ray-triangle intersection algorithm
    """
    # find edge vectors
    e1x = triangle.v1x - triangle.v0x
    e1y = triangle.v1y - triangle.v0y
    e1z = triangle.v1z - triangle.v0z

    e2x = triangle.v2x - triangle.v0x
    e2y = triangle.v2y - triangle.v0y
    e2z = triangle.v2z - triangle.v0z
    # calculate determinant cross product of DV and E2
    pvecx, pvecy, pvecz = cross(ray.dx, ray.dy, ray.dz, e2x, e2y, e2z)
    # pvecx=ray.dy*e2z-ray.dz*e2y
    # pvecy=ray.dz*e2x-ray.dx*e2z
    # pvecz=ray.dx*e2y-ray.dy*e2x
    # determinant is dot product of edge 1 and pvec
    A = dot(pvecx, pvecy, pvecz, e1x, e1y, e1z)
    # if A is near zero, then ray lies in the plane of the triangle
    if -EPSILON < A < EPSILON:
        # print('miss')
        return False, math.inf

    # if A is less than zero, then the ray is coming from behind the triangle
    # cull backface triangles
    if A < 0:
        return False, math.inf
    # calculate distance from vertice 0 to ray origin
    tvecx = ray.ox - triangle.v0x  # s
    tvecy = ray.oy - triangle.v0y  # s
    tvecz = ray.oz - triangle.v0z  # s
    # inverse determinant and calculate bounds of triangle
    F = 1.0 / A
    U = F * dot(tvecx, tvecy, tvecz, pvecx, pvecy, pvecz)
    # U=F*(tvecx*pvecx+tvecy*pvecy+tvecz*pvecz)
    # print('U',U)
    if U < 0.0 or U > (1.0):
        # in U coordinates, if U is less than 0 or greater than 1, it is outside the triangle
        # print('miss')
        return False, math.inf

    # cross product of tvec and E1
    qx, qy, qz = cross(tvecx, tvecy, tvecz, e1x, e1y, e1z)
    # qx=tvecy*e1z-tvecz*e1y
    # qy=tvecz*e1x-tvecx*e1z
    # qz=tvecx*e1y-tvecy*e1x
    V = F * dot(ray.dx, ray.dy, ray.dz, qx, qy, qz)
    # print('V,V+U',V,V+U)
    # V=F*(ray.dx*qx+ray.dy*qy+ray.dz*qz)
    if V < 0.0 or (U + V) > (1.0):
        # in UV coordinates, intersection is within triangle
        # print('miss')
        return False, math.inf

    # intersect_distance = F*(e2x*qx + e2y*qy + e2z*qz)
    intersect_distance = F * dot(e2x, e2y, e2z, qx, qy, qz)
    # print('dist',intersect_distance)
    # if (intersect_distance>EPSILON and intersect_distance<(1-EPSILON)):
    if (intersect_distance > (2 * EPSILON)) and (
        intersect_distance < (ray.dist - (2 * EPSILON))
    ):
        # intersection on triangle
        # print('hit')
        return True, intersect_distance

    return False, math.inf


@cuda.jit
def integratedkernal(ray_index, point_information, environment, ray_flag):
    cu_ray_num = cuda.grid(1)  # alias for threadIdx.x + ( blockIdx.x * blockDim.x ),

    if cu_ray_num < ray_index.shape[0]:
        # there are rays to process but must create and launch on the spot, then if succesful go to the next one until the index has been traversed
        # print(ray_index[cu_ray_num,0],ray_index[cu_ray_num,1],ray_index[cu_ray_num,2])
        # ray_components[cu_ray_num,:]=0.0
        # print(scattering_matrix.shape[0],scattering_matrix.shape[1])
        for i in range(ray_index.shape[1] - 1):
            # print('integrated ray',cu_ray_num)
            if ray_index[cu_ray_num, i + 1] != 0:
                #     #print(i,cu_ray_num,network_index[cu_ray_num,i],network_index[cu_ray_num,i+1])
                #     #convert source point field to ray

                ray = cuda.local.array(shape=(1), dtype=base_types.ray_t)
                point1 = point_information[ray_index[cu_ray_num, i] - 1]
                point2 = point_information[ray_index[cu_ray_num, i + 1] - 1]
                ray[0]["ox"] = point1["px"]
                ray[0]["oy"] = point1["py"]
                ray[0]["oz"] = point1["pz"]
                normconst = math.sqrt(
                    (point2["px"] - point1["px"]) ** 2
                    + (point2["py"] - point1["py"]) ** 2
                    + (point2["pz"] - point1["pz"]) ** 2
                )
                ray[0]["dx"] = (point2["px"] - point1["px"]) / normconst
                ray[0]["dy"] = (point2["py"] - point1["py"]) / normconst
                ray[0]["dz"] = (point2["pz"] - point1["pz"]) / normconst
                ray[0]["dist"] = normconst
                ray[0]["intersect"] = False
                # ray=rayprep(point1,point2,ray)
                # print('integrated ray',cu_ray_num,i,normconst)
                for tri_inc in range(len(environment)):
                    hit_bool, dist_temp = hit(ray[0], environment[tri_inc])
                    if hit_bool and (dist_temp < ray[0]["dist"]):
                        # hit something, so break out of loop and null index the first entry of the ray_index
                        ray[0]["dist"] = dist_temp
                        ray[0]["intersect"] = True

            # if ray[0]['intersect']:
            #    break

        ray_flag[cu_ray_num] = ray[0]["intersect"]
        # print('integrated',cu_ray_num,i)


@cuda.jit
def kernel1D(rays, environment, ray_flag, distances):

    cu_ray_num = cuda.grid(1)  # alias for threadIdx.x + ( blockIdx.x * blockDim.x ),
    #           threadIdx.y + ( blockIdx.y * blockDim.y )
    # margin=1e-5
    dist_min = math.inf
    intersect_check = False
    max_tri_num = len(environment)
    i = 0  # emulate a C-style for-loop, exposing the idx increment logic
    while i < max_tri_num:
        hit_bool, dist_temp = hit(rays[cu_ray_num], environment[i])
        i += 1
        if hit_bool and (dist_temp < dist_min):
            # hit something
            intersect_check = True
            dist_min = dist_temp

    ray_flag[cu_ray_num] = intersect_check
    distances[cu_ray_num] = dist_min


@cuda.jit
def kernel1Dv2(rays, environment):

    cu_ray_num = cuda.grid(1)  # alias for threadIdx.x + ( blockIdx.x * blockDim.x ),
    #           threadIdx.y + ( blockIdx.y * blockDim.y )
    # margin=1e-5
    # dist_min=math.inf
    # intersect_check=False
    max_tri_num = len(environment)
    i = 0  # emulate a C-style for-loop, exposing the idx increment logic
    while i < max_tri_num:
        hit_bool, dist_temp = hit(rays[cu_ray_num], environment[i])
        i += 1
        # if hit_bool:
        #    print(cu_ray_num)
        # if (cu_ray_num==5):
        #    #if (i==58):
        #    hit_bool, dist_temp = hit(rays[cu_ray_num], environment[i])
        #    print(i,dist_temp)

        if hit_bool and (dist_temp < rays[cu_ray_num]["dist"]):
            # hit something
            # print(cu_ray_num,i,dist_temp,'hit')
            # dist_min=dist_temp
            rays[cu_ray_num]["dist"] = dist_temp
            rays[cu_ray_num]["intersect"] = True
            # ray_flag[cu_ray_num]=True

    # if rays[cu_ray_num]['intersect']==False:
    #    print(cu_ray_num,i,dist_temp,'miss')

    nb.cuda.syncthreads()
    # if (rays[cu_ray_num]['intersect']==True):
    #    print(cu_ray_num,rays[cu_ray_num]['dist'])
    # if (cu_ray_num==1):
    #    print(i)


@cuda.jit
def kernel1Dv3(rays, environment):

    cu_ray_num = cuda.grid(1)  # alias for threadIdx.x + ( blockIdx.x * blockDim.x ),

    if (
        cu_ray_num < rays.shape[0]
    ):  #           threadIdx.y + ( blockIdx.y * blockDim.y )
        # margin=1e-5
        # dist_min=math.inf
        # intersect_check=False
        max_tri_num = len(environment)
        i = 0  # emulate a C-style for-loop, exposing the idx increment logic
        while i < max_tri_num:
            hit_bool, dist_temp = hit(rays[cu_ray_num], environment[i])
            i += 1
            # if hit_bool:
            #    print(cu_ray_num)
            # if (cu_ray_num==5):
            #    #if (i==58):
            #    hit_bool, dist_temp = hit(rays[cu_ray_num], environment[i])
            #    print(i,dist_temp)
            if hit_bool and (dist_temp < rays[cu_ray_num]["dist"]):
                # hit something
                # print(cu_ray_num,i,dist_temp,'hit')
                # dist_min=dist_temp
                # print('conventional',cu_ray_num,i,dist_temp,rays[cu_ray_num]['dist'])
                rays[cu_ray_num]["dist"] = dist_temp
                rays[cu_ray_num]["intersect"] = True

                # ray_flag[cu_ray_num]=True

        # print('conventional',cu_ray_num,dist_temp)
        nb.cuda.syncthreads()


def integratedRaycaster(ray_index, scattering_points, environment_local):
    start = timer()

    prep_dt = timer() - start
    raystart = timer()
    # Create a container for the pixel RGBA information of our image
    chunk_size = 2 ** 11
    threads_in_block = 1024
    # for idx in range(len(triangle_chunk)-1):
    d_environment = cuda.device_array(len(environment_local), dtype=base_types.triangle_t)
    d_ray_index = cuda.device_array(
        (ray_index.shape[0], ray_index.shape[1]), dtype=np.int32
    )
    d_ray_index = cuda.to_device(ray_index)
    ray_flags = np.full(ray_index.shape[0], False, dtype=np.bool)
    d_ray_flag = cuda.device_array(ray_index.shape[0], dtype=np.bool)
    d_ray_flag = cuda.to_device(ray_flags)
    cuda.to_device(environment_local, to=d_environment)
    d_point_information = cuda.device_array(
        scattering_points.shape[0], dtype=base_types.scattering_t
    )
    d_point_information = cuda.to_device(scattering_points)
    grids = math.ceil(ray_index.shape[0] / threads_in_block)
    threads = threads_in_block
    # Execute the kernel
    # cuda.profile_start()
    # kernel1Dv2[grids, threads](d_chunk_payload,d_environment,d_ray_flag)
    integratedkernal[grids, threads](
        d_ray_index, d_point_information, d_environment, d_ray_flag
    )
    # cuda.profile_stop()
    # distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host()
    propagation_index = d_ray_index.copy_to_host()
    ray_flags = d_ray_flag.copy_to_host()
    kernel_dt = timer() - raystart
    start = timer()

    mem_dt = timer() - start
    # deallocate memory on gpu
    #ctx = cuda.current_context()
    #deallocs = ctx.deallocations
    #deallocs.clear()
    # final_index=np.delete(propagation_index,np.where(propagation_index[:,0]==0),axis=0)
    final_index = np.delete(ray_index, ray_flags, axis=0)

    return final_index


[docs]def visiblespace( source_coords, source_normals, environment, vertex_area=0, az_range=np.linspace(-180.0, 180.0, 19), elev_range=np.linspace(-90.0, 90.0, 19), shell_range=0.5, ): """ Visiblespace generates a matrix stack of visible space for each element, indexed by source_coordinate. Parameters -------------- source_coords : n by 3 numpy array of floats xyz coordinates of the sources source_normals : n by 3 numpy array of floats normal vectors for each source point environment : :class:`lyceanem.base_classes.triangles` blocking environment vertex_area : float or array of floats the area associated with each source point, defaults to 0, but can also be specified for each source az_range : array of float array of azimuth planes in degrees elev_range : array of float array of elevation points in degrees shell_range: float radius of point cloud shell Returns ------------------- visible_patterns : m by l by n array of floats 3D antenna patterns resultant_pcd : open3d pointcloud colour data to scale the points fractional visibility from the source aperture """ azaz, elel = np.meshgrid(az_range, elev_range) sourcenum = len(source_coords) sinks = np.zeros((len(np.ravel(azaz)), 3), dtype=np.float32) sinks[:, 0], sinks[:, 1], sinks[:, 2] = azeltocart( np.ravel(azaz), np.ravel(elel), 1e9 ) sinknum = len(sinks) # # initial_index=np.full((len(source_coords),2),np.nan,dtype=np.int32) # initial_index[:,0]=np.arange(len(source_coords)) # pointingindex=np.reshape(np.arange(0,len(sinks)),(len(az_range),len(elev_range))) # need to create farfield sinks in az,elev coordinates, then convert to xyz sink coordinates, and generate index # missed_points,hit_points,missed_index,hit_index,shadow_rays=chunkingRaycaster1D(source_coords,sinks,np.zeros((1,3),dtype=np.float32),initial_index,environment,1,terminate_flag=True) hit_index, _ = workchunkingv2( source_coords, sinks, np.empty((0, 3), dtype=np.float32), environment, 1 ) unified_model = np.append( source_coords.astype(np.float32), sinks.astype(np.float32), axis=0 ) # filtered_network2,final_network2,filtered_index2,final_index2,shadow_rays directions = np.zeros((len(hit_index), 3), dtype=np.float32) norm_length = np.zeros((len(hit_index), 1), dtype=np.float32) hit_directions, hit_norms = math_functions.calc_dv_norm( unified_model[hit_index[:, 0].astype(int) - 1, :], unified_model[hit_index[:, 1].astype(int) - 1, :], directions, norm_length, ) # angles=Angular_distance(hit_directions,source_normals[hit_index[:,0].astype(int)-1,:].astype(np.float32)) angles = np.zeros((hit_directions.shape[0], 1), dtype=np.float32) angles = angle_pop( hit_directions, source_normals[hit_index[:, 0].astype(int) - 1, :].astype(np.float32), angles, ) # angles[np.isnan(angles)]=0 # visible_patterns=quickpatterncreator(az_range,elev_range,source_coords,angles,vertex_area,hit_index) if len(vertex_area) == 1: if vertex_area == 0: portion = np.zeros((len(angles), 1), dtype=np.float32) portion[:] = 1 else: portion = vertex_area * np.abs(np.cos(angles)) portion[portion < 0] = 0 else: portion = np.ravel(vertex_area[hit_index[:, 0].astype(int) - 1]) * np.abs( np.ravel(np.cos(angles)) ) portion[np.where(np.abs(angles) > (np.pi / 2))[0]] = 0.0 visible_patterns = np.empty((len(az_range) * len(elev_range)), dtype=np.float32) visible_patterns[:] = 0 visible_patterns = patternsort( visible_patterns, sourcenum, sinknum, portion, hit_index ).reshape(len(elev_range), len(az_range)) shell_coords = np.zeros((len(np.ravel(azaz)), 3), dtype=np.float32) shell_coords[:, 0], shell_coords[:, 1], shell_coords[:, 2] = azeltocart( np.ravel(azaz), np.ravel(elel), shell_range ) maxarea = np.nanmax(np.nanmax(visible_patterns)) resultant_pcd = patterntocloud( np.reshape(visible_patterns, (len(az_range) * len(elev_range), 1)), shell_coords, maxarea, ) return visible_patterns, resultant_pcd
@njit(cache=True, nogil=True) def unit_vector(vector): """ Returns the unit vector of the vector. """ return vector / np.linalg.norm(vector) @njit(cache=True, nogil=True) def angle(vector1, vector2): """ Returns the angle in radians between given vectors""" v1_u = unit_vector(vector1) v2_u = unit_vector(vector2) minor = np.linalg.det(np.stack((v1_u[-2:], v2_u[-2:]))) if minor == 0: sign = 1 else: sign = -np.sign(minor) dot_p = np.dot(v1_u, v2_u) dot_p = min(max(dot_p, -1.0), 1.0) return sign * np.arccos(dot_p) @njit(parallel=True) def angle_pop(hit_directions, source_normals, angles): # parallised angle calculation for angle_inc in prange(len(angles)): angles[angle_inc] = angle( hit_directions[angle_inc, :], source_normals[angle_inc, :] ) return angles @njit(parallel=True) def patternsort(visible_patterns, sourcenum, sinknum, portion, hit_index): # faster method of summing the array contributions to each farfield point for n in range(sinknum): visible_patterns[n] = np.sum(portion[hit_index[:, 1] - 1 - sourcenum == n]) return visible_patterns def patterncreator(az_range, elev_range, source_coords, pointingindex, hit_index): # creates visibility pattern from azimuth and elevation ranges visible_patterns = np.empty( (len(az_range), len(elev_range), len(source_coords)), dtype=np.int32 ) visible_patterns[:, :, :] = 0 for n in range(len(az_range)): for m in range(len(elev_range)): for element in range(len(source_coords)): if np.any( np.all( np.asarray( [ hit_index[:, 0] == element, hit_index[:, 1] == pointingindex[n, m], ] ), axis=0, ) ): visible_patterns[n, m, element] = 1 return visible_patterns def quickpatterncreator( az_range, elev_range, source_coords, angles, vertex_area, hit_index ): # creates visibility pattern from azimuth and elevation ranges visible_patterns = np.empty((len(az_range), len(elev_range)), dtype=np.float32) visible_patterns[:, :] = 0 pointingindex = np.reshape( np.arange(0, len(az_range) * len(elev_range)), (len(az_range), len(elev_range)) ) sourcenum = len(source_coords) if vertex_area == 0: for n in range(len(az_range)): for m in range(len(elev_range)): visible_patterns[n, m] = np.sum( hit_index[:, 1] - sourcenum - 1 == pointingindex[n, m] ) else: portion = vertex_area * np.cos(angles) portion[portion < 0] = 0 for n in range(len(az_range)): for m in range(len(elev_range)): visible_patterns[n, m] = np.sum( portion[hit_index[:, 1] - sourcenum - 1 == pointingindex[n, m]] ) return visible_patterns # @njit def patterntocloud(pattern_data, shell_coords, maxarea): # takes the pattern_data and shell_coordinates, and creates an open3d point cloud based upon the data. point_cloud = points2pointcloud(shell_coords) points, elements = np.shape(pattern_data) # normdata viridis = cm.get_cmap("viridis", 40) visible_elements = np.sum(pattern_data / maxarea, axis=1) np_colors = viridis(visible_elements) point_cloud.colors = o3d.utility.Vector3dVector(np_colors[:, 0:3]) return point_cloud def azeltocart(az_data, el_data, radius): # convert from az,el and radius data to xyz x_data = radius * np.cos(np.deg2rad(el_data)) * np.cos(np.deg2rad(az_data)) y_data = radius * np.cos(np.deg2rad(el_data)) * np.sin(np.deg2rad(az_data)) z_data = radius * np.sin(np.deg2rad(el_data)) return x_data, y_data, z_data def convertTriangles(triangle_object): """ convert o3d triangle object to ray tracer triangle class """ if triangle_object == None: triangles = np.empty(0, dtype=base_types.triangle_t) else: vertices = np.asarray(triangle_object.vertices) tri_index = np.asarray(triangle_object.triangles) normals = np.asarray(triangle_object.triangle_normals) triangles = np.empty(len(tri_index), dtype=base_types.triangle_t) for idx in range(len(tri_index)): triangles[idx]["v0x"] = np.single(vertices[tri_index[idx, 0], 0]) triangles[idx]["v0y"] = np.single(vertices[tri_index[idx, 0], 1]) triangles[idx]["v0z"] = np.single(vertices[tri_index[idx, 0], 2]) triangles[idx]["v1x"] = np.single(vertices[tri_index[idx, 1], 0]) triangles[idx]["v1y"] = np.single(vertices[tri_index[idx, 1], 1]) triangles[idx]["v1z"] = np.single(vertices[tri_index[idx, 1], 2]) triangles[idx]["v2x"] = np.single(vertices[tri_index[idx, 2], 0]) triangles[idx]["v2y"] = np.single(vertices[tri_index[idx, 2], 1]) triangles[idx]["v2z"] = np.single(vertices[tri_index[idx, 2], 2]) # triangles[idx]['normx']=np.single(normals[idx,0]) # triangles[idx]['normy']=np.single(normals[idx,1]) # triangles[idx]['normz']=np.single(normals[idx,2]) return triangles def pick_points(pcd): """ test function based on open3d example to pick points, can be used as the basis of a selection function to pick vertices. """ print("") print("1) Please pick at least three correspondences using [shift + left click]") print(" Press [shift + right click] to undo point picking") print("2) Afther picking points, press q for close the window") vis = o3d.visualization.VisualizerWithEditing() vis.create_window() vis.add_geometry(pcd) vis.run() # user picks points vis.destroy_window() print("") return vis.get_picked_points() def points2pointcloud(xyz): """ turns numpy array of xyz data into a o3d format point cloud Parameters ---------- xyz : TYPE DESCRIPTION. Returns ------- pcd : point cloud format """ pcd = o3d.geometry.PointCloud() if xyz.shape[1] == 3: pcd.points = o3d.utility.Vector3dVector(xyz) else: reshaped = xyz.reshape((int(len(xyz.ravel()) / 3), 3)) pcd.points = o3d.utility.Vector3dVector(reshaped) return pcd @guvectorize([(float32[:, :], float32[:])], "(m,n)->(m)", target="parallel") def PathLength(partial_network, path_length): """ for each row in the partial_network, calculate the distance travelled from the origin to the sink point # Parameters ---------- partial_network : n by (3*m) array of coordinates # Returns ------- path_lengths : n by 1 array of path lengths between each point. # """ if partial_network.shape[1] == 6: for i in range(partial_network.shape[0]): path_length[i] = np.sqrt( (partial_network[i, 3] - partial_network[i, 0]) ** 2 + (partial_network[i, 4] - partial_network[i, 1]) ** 2 + (partial_network[i, 5] - partial_network[i, 2]) ** 2 ) elif partial_network.shape[1] == 9: for i in range(partial_network.shape[0]): path_length[i] = np.sqrt( (partial_network[i, 3] - partial_network[i, 0]) ** 2 + (partial_network[i, 4] - partial_network[i, 1]) ** 2 + (partial_network[i, 5] - partial_network[i, 2]) ** 2 ) + np.sqrt( (partial_network[i, 6] - partial_network[i, 3]) ** 2 + (partial_network[i, 7] - partial_network[i, 4]) ** 2 + (partial_network[i, 8] - partial_network[i, 5]) ** 2 ) elif partial_network.shape[1] == 12: for i in range(partial_network.shape[0]): path_length[i] = ( np.sqrt( (partial_network[i, 3] - partial_network[i, 0]) ** 2 + (partial_network[i, 4] - partial_network[i, 1]) ** 2 + (partial_network[i, 5] - partial_network[i, 2]) ** 2 ) + np.sqrt( (partial_network[i, 6] - partial_network[i, 3]) ** 2 + (partial_network[i, 7] - partial_network[i, 4]) ** 2 + (partial_network[i, 8] - partial_network[i, 5]) ** 2 ) + np.sqrt( (partial_network[i, 9] - partial_network[i, 6]) ** 2 + (partial_network[i, 10] - partial_network[i, 7]) ** 2 + (partial_network[i, 11] - partial_network[i, 8]) ** 2 ) ) # @guvectorize([(int32[:,:], float32[:,:], float32[:,:], float32[:,:], float32, complex64[:,:])], '(m,n)->(m,n)',target='parallel') @njit(parallel=True, nogil=True, cache=True) def ScatteringNetworkGenerator( network_index, scattering_points, scattering_normals, scattering_weights, point_information, scattering_coefficient, wavelength, local_channel, ): """ for each row in the partial_network, calculate the distance travelled from the origin to the sink point # Parameters ---------- partial_network : n by (3*m) array of coordinates # Returns ------- local_channel : n by 3 array of electric field vectors at each sink # """ if network_index.shape[1] == 2: for i in prange(network_index.shape[0]): local_channel[i, :] = EM.losChannel( scattering_points[network_index[i, 0] - 1, :], scattering_normals[network_index[i, 0] - 1, :], scattering_weights[network_index[i, 0] - 1, :], point_information[network_index[i, 0] - 1], scattering_points[network_index[i, 1] - 1, :], scattering_normals[network_index[i, 1] - 1, :], scattering_weights[network_index[i, 1] - 1, :], point_information[network_index[i, 1] - 1], scattering_coefficient, wavelength, ) elif network_index.shape[1] == 3: for i in prange(network_index.shape[0]): local_channel[i, :] = EM.losplus1Channel( scattering_points[network_index[i, 0] - 1, :], scattering_normals[network_index[i, 0] - 1, :], scattering_weights[network_index[i, 0] - 1, :], point_information[network_index[i, 0] - 1], scattering_points[network_index[i, 1] - 1, :], scattering_normals[network_index[i, 1] - 1, :], scattering_weights[network_index[i, 1] - 1, :], point_information[network_index[i, 1] - 1], scattering_points[network_index[i, 2] - 1, :], scattering_normals[network_index[i, 2] - 1, :], scattering_weights[network_index[i, 2] - 1, :], point_information[network_index[i, 2] - 1], scattering_coefficient, wavelength, ) elif network_index.shape[1] == 4: for i in prange(network_index.shape[0]): local_channel[i, :] = EM.losplus2Channel( scattering_points[network_index[i, 0] - 1, :], scattering_normals[network_index[i, 0] - 1, :], scattering_weights[network_index[i, 0] - 1, :], point_information[network_index[i, 0] - 1], scattering_points[network_index[i, 1] - 1, :], scattering_normals[network_index[i, 1] - 1, :], scattering_weights[network_index[i, 1] - 1, :], point_information[network_index[i, 1] - 1], scattering_points[network_index[i, 2] - 1, :], scattering_normals[network_index[i, 2] - 1, :], scattering_weights[network_index[i, 2] - 1, :], point_information[network_index[i, 2] - 1], scattering_points[network_index[i, 3] - 1, :], scattering_normals[network_index[i, 3] - 1, :], scattering_weights[network_index[i, 3] - 1, :], point_information[network_index[i, 3] - 1], scattering_coefficient, wavelength, ) return local_channel @njit(parallel=True, nogil=True, cache=True) def ScatteringNetworkGeneratorTime( network_index, scattering_points, scattering_normals, scattering_weights, point_information, wavelength, local_channel, times, ): """ for each row in the partial_network, calculate the distance travelled from the origin to the sink point # Parameters ---------- partial_network : n by (3*m) array of coordinates # Returns ------- local_channel : n by 3 array of electric field vectors at each sink # """ if network_index.shape[1] == 2: for i in prange(network_index.shape[0]): local_channel[i, :], times[i] = EM.losChannelv2( scattering_points[network_index[i, 0] - 1, :], scattering_normals[network_index[i, 0] - 1, :], scattering_weights[network_index[i, 0] - 1, :], point_information[network_index[i, 0] - 1], scattering_points[network_index[i, 1] - 1, :], scattering_normals[network_index[i, 1] - 1, :], scattering_weights[network_index[i, 1] - 1, :], point_information[network_index[i, 1] - 1], wavelength, ) elif network_index.shape[1] == 3: for i in prange(network_index.shape[0]): local_channel[i, :], times[i] = EM.losplus1Channelv2( scattering_points[network_index[i, 0] - 1, :], scattering_normals[network_index[i, 0] - 1, :], scattering_weights[network_index[i, 0] - 1, :], point_information[network_index[i, 0] - 1], scattering_points[network_index[i, 1] - 1, :], scattering_normals[network_index[i, 1] - 1, :], scattering_weights[network_index[i, 1] - 1, :], point_information[network_index[i, 1] - 1], scattering_points[network_index[i, 2] - 1, :], scattering_normals[network_index[i, 2] - 1, :], scattering_weights[network_index[i, 2] - 1, :], point_information[network_index[i, 2] - 1], wavelength, ) elif network_index.shape[1] == 4: for i in prange(network_index.shape[0]): local_channel[i, :], times[i] = EM.losplus2Channelv2( scattering_points[network_index[i, 0] - 1, :], scattering_normals[network_index[i, 0] - 1, :], scattering_weights[network_index[i, 0] - 1, :], point_information[network_index[i, 0] - 1], scattering_points[network_index[i, 1] - 1, :], scattering_normals[network_index[i, 1] - 1, :], scattering_weights[network_index[i, 1] - 1, :], point_information[network_index[i, 1] - 1], scattering_points[network_index[i, 2] - 1, :], scattering_normals[network_index[i, 2] - 1, :], scattering_weights[network_index[i, 2] - 1, :], point_information[network_index[i, 2] - 1], scattering_points[network_index[i, 3] - 1, :], scattering_normals[network_index[i, 3] - 1, :], scattering_weights[network_index[i, 3] - 1, :], point_information[network_index[i, 3] - 1], wavelength, ) return local_channel, times @njit(parallel=True) def ScatteringNetworkGeneratorv2( network_index, scattering_points, scattering_normals, scattering_weights, point_information, wavelength, local_channel, ): """ for each row in the partial_network, calculate the distance travelled from the origin to the sink point # Parameters ---------- partial_network : n by (3*m) array of coordinates # Returns ------- local_channel : n by 3 array of electric field vectors at each sink # """ if network_index.shape[1] == 2: local_channel = EM.losChannel( scattering_points[network_index[0] - 1], scattering_normals[network_index[0] - 1], scattering_weights[network_index[0] - 1], point_information[network_index[0] - 1], scattering_points[network_index[1] - 1], scattering_normals[network_index[1] - 1], scattering_weights[network_index[1] - 1], point_information[network_index[1] - 1], wavelength, ) elif network_index.shape[1] == 3: local_channel = EM.losplus1Channel( scattering_points[network_index[0] - 1], scattering_normals[network_index[0] - 1], scattering_weights[network_index[0] - 1], point_information[network_index[0] - 1], scattering_points[network_index[1] - 1], scattering_normals[network_index[1] - 1], scattering_weights[network_index[1] - 1], point_information[network_index[1] - 1], scattering_points[network_index[2] - 1], scattering_normals[network_index[2] - 1], scattering_weights[network_index[2] - 1], point_information[network_index[2] - 1], wavelength, ) elif network_index.shape[1] == 4: local_channel = EM.losplus2Channel( scattering_points[network_index[0] - 1], scattering_normals[network_index[0] - 1], scattering_weights[network_index[0] - 1], point_information[network_index[0] - 1], scattering_points[network_index[1] - 1], scattering_normals[network_index[1] - 1], scattering_weights[network_index[1] - 1], point_information[network_index[1] - 1], scattering_points[network_index[2] - 1], scattering_normals[network_index[2] - 1], scattering_weights[network_index[2] - 1], point_information[network_index[2] - 1], scattering_points[network_index[3] - 1], scattering_normals[network_index[3] - 1], scattering_weights[network_index[3] - 1], point_information[network_index[3] - 1], wavelength, ) return local_channel def PoyntingVector(partial_network): """ for each row in the partial_network, calculate the distance travelled from the origin to the sink point # Parameters ---------- partial_network : n by 6 array of coordinates # Returns ------- pointing_vectors : n by 3 array of path lengths between each point. # """ pointing_vectors = np.zeros((partial_network.shape[0], 3), dtype=np.float32) pointing_mags = np.zeros((partial_network.shape[0], 1), dtype=np.float32) for i in range(partial_network.shape[0]): pointing_vectors[i, :] = partial_network[i, -3:] - partial_network[i, -6:-3] pointing_mags[i] = np.sqrt( pointing_vectors[i, 0] ** 2 + pointing_vectors[i, 1] ** 2 + pointing_vectors[i, 2] ** 2 ) pointing_vectors = pointing_vectors / pointing_mags return pointing_vectors def AngleofArrivalVectors(partial_network): """ calculate the vector from the receiving point to the incoming ray, to calculate angle of arrival correctly # Parameters ---------- partial_network : n by 6 array of coordinates # Returns ------- pointing_vectors : n by 3 array of path lengths between each point. # """ pointing_vectors = np.zeros((partial_network.shape[0], 3), dtype=np.float32) pointing_mags = np.zeros((partial_network.shape[0], 1), dtype=np.float32) for i in range(partial_network.shape[0]): pointing_vectors[i, :] = partial_network[i, -6:-3] - partial_network[i, -3:] pointing_mags[i] = np.sqrt( pointing_vectors[i, 0] ** 2 + pointing_vectors[i, 1] ** 2 + pointing_vectors[i, 2] ** 2 ) pointing_vectors = pointing_vectors / pointing_mags return pointing_vectors def CalculatePoyntingVectors( total_network, wavelength, scattering_index, ideal_vector=np.asarray([[1, 0, 0]]), az_bins=np.linspace(-np.pi, np.pi, 19), el_bins=np.linspace(-np.pi / 2.0, np.pi / 2.0, 19), time_bins=np.linspace(-1e-9, 1.9e-8, 20), impulse=False, aoa=True, ): """ Takes the total network generated using the raycasting process, and calculates the angle of arrival spectrum, delay spectrum, and angular standard deviation as a measure of `farfield ness' of the arriving waves' Parameters ---------- total_network : array of n*(m*3) tuples the coordinates of each interaction point wavelength : float wavelength of interest, currently a single value (SI units) scattering_index : n*2 array of integers the source and sink index of each ray ideal_vector : 1*3 array the direction vector of the `ideal' incoming ray time bins : 1d numpy array of floats default is 20 bins, but this should always be set by the user to a sampling rate sufficent for twice the expected highest frequency Returns ------- aoa_spectrum : array of size sinknum*len(theta_bins) angle of arrival spectrum impulse_response : travel_times : """ max_path_divergence = 2.0 if len(total_network) == 0: sourcenum = np.int(np.nanmax(scattering_index[:, 0])) + 1 sinknum = np.int(np.nanmax(scattering_index[:, 1])) + 1 aoa_spectrum = np.zeros((sinknum, az_bins.shape[0]), dtype=np.float32) delay_spectrum = np.zeros((sinknum, az_bins.shape[0]), dtype=np.float32) else: wave_vector = (2 * np.pi) / wavelength sourcenum = np.int(np.nanmax(scattering_index[:, 0])) + 1 sinknum = np.int(np.nanmax(scattering_index[:, 1])) + 1 scatterdepth = int((len(total_network[0, :]) - 3) / 3) impulse_response = np.zeros((len(time_bins), sinknum), dtype="complex") angle_response = np.zeros((len(az_bins), sinknum), dtype="complex") aoa_spectrum = np.zeros((sinknum, az_bins.shape[0]), dtype=np.float32) delay_spectrum = np.zeros((sinknum, az_bins.shape[0]), dtype=np.float32) az_std = np.zeros((sinknum), dtype=np.float32) averaged_response = np.zeros((sinknum, 1), dtype="complex") phase_std = np.zeros((sinknum), dtype=np.float32) total_pointing_vectors = np.empty((0, 3), dtype=np.float32) paths = np.empty((0, 1), dtype=np.float32).ravel() for idx in range(scatterdepth): if idx == (scatterdepth - 1): # indexing if scatterdepth == 1: start_row = 0 end_row = len(total_network[:, 1]) - 1 else: start_row = ( np.max( np.where((np.isnan(total_network[:, 6 + ((idx - 1) * 3)]))) ) + 1 ) end_row = len(total_network[:, 1]) - 1 end_col = (idx * 3) + 6 # engine here paths = np.append( paths, PathLength(total_network[start_row : end_row + 1, :end_col]), axis=0, ) total_pointing_vectors = np.append( total_pointing_vectors, AngleofArrivalVectors( total_network[start_row : end_row + 1, :end_col] ), axis=0, ) elif idx == 0: # indexing start_row = 0 end_row = np.max(np.where((np.isnan(total_network[:, 6 + (idx * 3)])))) end_col = (idx * 3) + 6 # engine here paths = np.append( paths, PathLength(total_network[start_row : end_row + 1, :end_col]), axis=0, ) total_pointing_vectors = np.append( total_pointing_vectors, AngleofArrivalVectors( total_network[start_row : end_row + 1, :end_col] ), axis=0, ) else: # indexing start_row = ( np.max(np.where((np.isnan(total_network[:, 3 + (idx * 3)])))) + 1 ) end_row = np.max( np.where((np.isnan(total_network[:, 6 + ((idx) * 3)]))) ) end_col = (idx * 3) + 6 # engine here paths = np.append( paths, PathLength(total_network[start_row : end_row + 1, :end_col]), axis=0, ) total_pointing_vectors = np.append( total_pointing_vectors, AngleofArrivalVectors( total_network[start_row : end_row + 1, :end_col] ), axis=0, ) # # scatter_map indexing path_loss = wavelength / (4 * np.pi * paths) ray_components = path_loss * (np.exp(paths * wave_vector * 1j)) phase_components = paths * wave_vector angular_dist = Angular_distance(ideal_vector, total_pointing_vectors) travel_times = paths / scipy.constants.c for sink_index in range(sinknum): sink_indexing = np.where((scattering_index[:, 1] == sink_index)) if impulse: impulse_response = ImpulseStack( impulse_response, ray_components, sink_index, sink_indexing, travel_times, time_bins, ) # impulse_bin_indexing=np.digitize(travel_times[sink_indexing],time_bins+travel_times[np.argmax(ray_components)],right=True) # for time_index in range(len(ray_components[sink_indexing])): # impulse_response[impulse_bin_indexing[time_index],sink_index]=impulse_response[impulse_bin_indexing[time_index],sink_index]+ray_components[sink_indexing][time_index] if aoa: angle_response = angleofarrivalStack( angle_response, ray_components, sink_index, sink_indexing, angular_dist, az_bins, ) # angle_bin_indexing=np.digitize(angular_dist[sink_indexing],az_bins,right=True) # for angle_index in range(len(ray_components[sink_indexing])): # angle_response[angle_bin_indexing[angle_index],sink_index]=angle_response[angle_bin_indexing[angle_index],sink_index]+ray_components[sink_indexing][angle_index] return angle_response, impulse_response, travel_times[np.argmax(ray_components)] @njit(parallel=True) def ImpulseStack( impulse_response, ray_components, sink_index, sink_indexing, travel_times, time_bins ): # attempt to impove the efficiency of the digitize action impulse_bin_indexing = np.digitize( travel_times[sink_indexing], time_bins + travel_times[np.argmax(ray_components)], right=True, ) for time_index in prange(len(ray_components[sink_indexing])): impulse_response[impulse_bin_indexing[time_index], sink_index] = ( impulse_response[impulse_bin_indexing[time_index], sink_index] + ray_components[sink_indexing][time_index] ) return impulse_response @njit(parallel=True) def angleofarrivalStack( angle_response, ray_components, sink_index, sink_indexing, angular_dist, az_bins ): # improved efficiency digitizatoin angle_bin_indexing = np.digitize(angular_dist[sink_indexing], az_bins, right=True) for angle_index in prange(len(ray_components[sink_indexing])): angle_response[angle_bin_indexing[angle_index], sink_index] = ( angle_response[angle_bin_indexing[angle_index], sink_index] + ray_components[sink_indexing][angle_index] ) return angle_response @njit(parallel=True) def Angular_distance(center_vector, pointing_vectors): if len(center_vector) == 1: angle_lists = np.zeros((len(pointing_vectors), 1), dtype=np.float32) for i in range(len(pointing_vectors)): angle_lists[i] = np.arccos(np.dot(center_vector, pointing_vectors[i, :])) # angle_lists[i]=np.arctan2(pointing_vectors[i,1],pointing_vectors[i,0]) else: angle_lists = np.zeros((len(pointing_vectors), 1), dtype=np.float32) for i in range(len(pointing_vectors)): angle_lists[i] = np.arccos( np.dot(center_vector[i, :], pointing_vectors[i, :]) ) # angle_lists[i]=np.arctan2(pointing_vectors[i,1],pointing_vectors[i,0]) return angle_lists @jit(nopython=True) def weighted_avg_and_std(values, weights): """ Return the weighted average and standard deviation Parameters ---------- values : TYPE DESCRIPTION. weights : TYPE DESCRIPTION. Returns ------- None. """ average = np.average(values, weights=weights) # Fast and numerically precise: variance = np.average((values - average) ** 2, weights=weights) return (average, math.sqrt(variance)) # @jit def VectorNetworkProcessEM( sourcenum, sinknum, unified_points, unified_normals, unified_weights, point_information, scattering_index, scattering_coefficient, wavelength, ): """ A more efficienct version of BaseNetworkProcess, using the vectorised path length function to process the total_network array more efficienctly. # Parameters ---------- total_network : array of n*(m*3) tuples, the coordinates of each interaction point wavelength : scaler wavelength of interest, currently a single value (SI units) scattering_index : n*2 array of integers, the source and sink index of each ray # Returns ------- scatter_map : source_size*sink_size array # """ if scattering_index.shape[0] == 0: scatter_map = np.zeros((sourcenum, sinknum, 3), dtype="complex") else: wave_vector = (2 * np.pi) / wavelength scatterdepth = scattering_index.shape[1] - 1 scatter_map = np.zeros((sourcenum, sinknum, 3, scatterdepth), dtype="complex") for idx in range(scatterdepth): if idx == (scatterdepth - 1): ray_analysis_index = scattering_index[ ~np.equal(scattering_index[:, idx + 1], 0), : ] ray_components = np.zeros( (ray_analysis_index.shape[0], 3), dtype=np.complex64 ) ray_components = ScatteringNetworkGenerator( ray_analysis_index, unified_points, unified_normals, unified_weights, point_information, scattering_coefficient, wavelength, ray_components, ) # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) depth_slice = ray_analysis_index[:, [0, -1]] elif idx == 0: ray_analysis_index = scattering_index[ np.equal(scattering_index[:, idx + 2], 0), 0 : idx + 2 ] ray_components = np.zeros( (ray_analysis_index.shape[0], 3), dtype=np.complex64 ) ray_components = ScatteringNetworkGenerator( ray_analysis_index, unified_points, unified_normals, unified_weights, point_information, scattering_coefficient, wavelength, ray_components, ) # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) depth_slice = ray_analysis_index else: ray_analysis_index = scattering_index[ ~np.equal(scattering_index[:, idx + 1], 0), 0 : idx + 2 ] ray_components = np.zeros( (ray_analysis_index.shape[0], 3), dtype=np.complex64 ) ray_components = ScatteringNetworkGenerator( ray_analysis_index, unified_points, unified_normals, unified_weights, point_information, scattering_coefficient, wavelength, ray_components, ) # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) depth_slice = ray_analysis_index[:, [0, -1]] # # scatter_map indexing # for source_index in range(sourcenum): # for sink_index in range(sinknum): # scatter_map[source_index,sink_index,idx]=np.sum(ray_components[(depth_slice[:,0]-1==0)&(depth_slice[:,1]-sourcenum-1==0)]) scatter_map = scatter_net_sortEMtest( sourcenum, sinknum, scatter_map, depth_slice, ray_components, idx ) return scatter_map def VectorNetworkProcessTimeEM( sourcenum, sinknum, unified_points, unified_normals, unified_weights, point_information, scattering_index, wavelength, ): """ A more efficienct version of BaseNetworkProcess, using the vectorised path length function to process the total_network array more efficienctly. # Parameters ---------- total_network : array of n*(m*3) tuples, the coordinates of each interaction point wavelength : scaler wavelength of interest, currently a single value (SI units) scattering_index : n*2 array of integers, the source and sink index of each ray # Returns ------- scatter_map : source_size*sink_size array # """ if scattering_index.shape[0] == 0: scatter_map = np.zeros((sourcenum, sinknum, 3), dtype="complex") else: time_index = np.linspace(1e-8, 2e-6, 100000) wave_vector = (2 * np.pi) / wavelength scatterdepth = scattering_index.shape[1] - 1 scatter_map = np.zeros( (sourcenum, sinknum, 3, len(time_index), scatterdepth), dtype="complex" ) for idx in range(scatterdepth): if idx == (scatterdepth - 1): ray_analysis_index = scattering_index[ ~np.equal(scattering_index[:, idx + 1], 0), : ] ray_components = np.zeros( (ray_analysis_index.shape[0], 3), dtype=np.complex64 ) time_components = np.zeros( (ray_analysis_index.shape[0]), dtype=np.float32 ) ray_components, time_components = ScatteringNetworkGeneratorTime( ray_analysis_index, unified_points, unified_normals, unified_weights, point_information, wavelength, ray_components, time_components, ) # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) depth_slice = ray_analysis_index[:, [0, -1]] elif idx == 0: ray_analysis_index = scattering_index[ np.equal(scattering_index[:, idx + 2], 0), 0 : idx + 2 ] ray_components = np.zeros( (ray_analysis_index.shape[0], 3), dtype=np.complex64 ) time_components = np.zeros( (ray_analysis_index.shape[0]), dtype=np.float32 ) ray_components, time_components = ScatteringNetworkGeneratorTime( ray_analysis_index, unified_points, unified_normals, unified_weights, point_information, wavelength, ray_components, time_components, ) # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) depth_slice = ray_analysis_index else: ray_analysis_index = scattering_index[ ~np.equal(scattering_index[:, idx + 1], 0), 0 : idx + 2 ] ray_components = np.zeros( (ray_analysis_index.shape[0], 3), dtype=np.complex64 ) time_components = np.zeros( (ray_analysis_index.shape[0]), dtype=np.float32 ) ray_components, time_components = ScatteringNetworkGeneratorTime( ray_analysis_index, unified_points, unified_normals, unified_weights, point_information, wavelength, ray_components, time_components, ) # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) depth_slice = ray_analysis_index[:, [0, -1]] # # scatter_map indexing # for source_index in range(sourcenum): # for sink_index in range(sinknum): # scatter_map[source_index,sink_index,idx]=np.sum(ray_components[(depth_slice[:,0]-1==0)&(depth_slice[:,1]-sourcenum-1==0)]) scatter_map = scatter_net_sortTimeEM( sourcenum, sinknum, scatter_map, depth_slice, ray_components, time_components, time_index, idx, ) return scatter_map def VectorNetworkProcessEMv2( sourcenum, sinknum, unified_points, unified_normals, unified_weights, point_information, scattering_index, wavelength, ): """ A more efficienct version of BaseNetworkProcess, using the vectorised path length function to process the total_network array more efficienctly. # Parameters ---------- total_network : array of n*(m*3) tuples, the coordinates of each interaction point wavelength : scaler wavelength of interest, currently a single value (SI units) scattering_index : n*2 array of integers, the source and sink index of each ray # Returns ------- scatter_map : source_size*sink_size array # """ if scattering_index.shape[0] == 0: scatter_map = np.zeros((sourcenum, sinknum, 3), dtype="complex") else: wave_vector = (2 * np.pi) / wavelength scatterdepth = scattering_index.shape[1] - 1 scatter_map = np.zeros((sourcenum, sinknum, 3, scatterdepth), dtype="complex") ray_components = np.zeros((scattering_index.shape[0], 3), dtype=np.complex64) ray_components = EM.EMGPUWrapper( scattering_index, point_information, wavelength ) for idx in range(scatterdepth): if idx == (scatterdepth - 1): ray_analysis_index = scattering_index[ ~np.equal(scattering_index[:, idx + 1], 0), : ] temp_components = ray_components[ ~np.equal(scattering_index[:, idx + 1], 0), : ] # ray_components=ScatteringNetworkGenerator(ray_analysis_index,unified_points,unified_normals,unified_weights,point_information,wavelength,ray_components) # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) depth_slice = ray_analysis_index[:, [0, -1]] elif idx == 0: ray_analysis_index = scattering_index[ np.equal(scattering_index[:, idx + 2], 0), 0 : idx + 2 ] temp_components = ray_components[ ~np.equal(scattering_index[:, idx + 2], 0), : ] # ray_components=np.zeros((ray_analysis_index.shape[0],3),dtype=np.complex64) # ray_components=EM.EMGPUWrapper(ray_analysis_index,point_information,wavelength) # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) depth_slice = ray_analysis_index else: ray_analysis_index = scattering_index[ ~np.equal(scattering_index[:, idx + 1], 0), 0 : idx + 2 ] temp_components = ray_components[ ~np.equal(scattering_index[:, idx + 1], 0), : ] # ray_components=EM.EMGPUWrapper(ray_analysis_index,point_information,wavelength) # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) depth_slice = ray_analysis_index[:, [0, -1]] # # scatter_map indexing # for source_index in range(sourcenum): # for sink_index in range(sinknum): # scatter_map[source_index,sink_index,idx]=np.sum(ray_components[(depth_slice[:,0]-1==0)&(depth_slice[:,1]-sourcenum-1==0)]) scatter_map = scatter_net_sortEM( sourcenum, sinknum, scatter_map, depth_slice, temp_components, idx ) return scatter_map # @jit def VectorNetworkProcessv2( sources, sinks, scattering_points, scattering_index, wavelength ): """ A more efficienct version of BaseNetworkProcess, using the vectorised path length function to process the total_network array more efficienctly. # Parameters ---------- total_network : array of n*(m*3) tuples, the coordinates of each interaction point wavelength : scaler wavelength of interest, currently a single value (SI units) scattering_index : n*2 array of integers, the source and sink index of each ray # Returns ------- scatter_map : source_size*sink_size array # """ if scattering_index.shape[0] == 0: sourcenum = np.int(np.nanmax(scattering_index[:, 0])) + 1 sinknum = np.int(np.nanmax(scattering_index[:, 1])) + 1 scatter_map = np.zeros((sourcenum, sinknum, 1), dtype="complex") else: unified_model = np.append( np.append(sources.astype(np.float32), sinks.astype(np.float32), axis=0), scattering_points.astype(np.float32), axis=0, ) wave_vector = (2 * np.pi) / wavelength sourcenum = sources.shape[0] sinknum = sinks.shape[0] scatterdepth = scattering_index.shape[1] - 1 scatter_map = np.zeros((sourcenum, sinknum, scatterdepth), dtype="complex") for idx in range(scatterdepth): if idx == (scatterdepth - 1): ray_analysis_index = scattering_index[ ~np.equal(scattering_index[:, idx + 1], 0), : ] if idx == 0: paths = PathLength( np.append( unified_model[ray_analysis_index[:, 0], :] - 1, unified_model[ray_analysis_index[:, 1] - 1, :], axis=1, ) ) elif idx == 1: paths = PathLength( np.append( np.append( unified_model[ray_analysis_index[:, 0] - 1, :], unified_model[ray_analysis_index[:, 1] - 1, :], axis=1, ), unified_model[ray_analysis_index[:, 2] - 1, :], axis=1, ) ) elif idx == 2: paths = PathLength( np.append( np.append( np.append( unified_model[ray_analysis_index[:, 0] - 1, :], unified_model[ray_analysis_index[:, 1] - 1, :], axis=1, ), unified_model[ray_analysis_index[:, 2] - 1, :], axis=1, ), unified_model[ray_analysis_index[:, 3] - 1, :], axis=1, ) ) # path_loss=((wavelength/(4*np.pi*paths))**2)*np.exp(paths*wave_vector*1j) path_loss = wavelength / (4 * np.pi * paths) ray_components = path_loss * (np.exp(paths * wave_vector * 1j)) # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) depth_slice = ray_analysis_index[:, [0, -1]] elif idx == 0: ray_analysis_index = scattering_index[ np.equal(scattering_index[:, idx + 2], 0), 0 : idx + 2 ] paths = PathLength( np.append( unified_model[ray_analysis_index[:, 0] - 1, :], unified_model[ray_analysis_index[:, 1] - 1, :], axis=1, ) ) # path_loss=((wavelength/(4*np.pi*paths))**2)*np.exp(paths*wave_vector*1j) path_loss = wavelength / (4 * np.pi * paths) ray_components = path_loss * (np.exp(paths * wave_vector * 1j)) # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) depth_slice = ray_analysis_index else: ray_analysis_index = scattering_index[ ~np.equal(scattering_index[:, idx + 1], 0), : ] if idx == 0: paths = PathLength( np.append( unified_model[ray_analysis_index[:, 0], :] - 1, unified_model[ray_analysis_index[:, 1] - 1, :], axis=1, ) ) elif idx == 1: paths = PathLength( np.append( np.append( unified_model[ray_analysis_index[:, 0] - 1, :], unified_model[ray_analysis_index[:, 1] - 1, :], axis=1, ), unified_model[ray_analysis_index[:, 2] - 1, :], axis=1, ) ) elif idx == 2: paths = PathLength( np.append( np.append( np.append( unified_model[ray_analysis_index[:, 0] - 1, :], unified_model[ray_analysis_index[:, 1] - 1, :], axis=1, ), unified_model[ray_analysis_index[:, 2] - 1, :], axis=1, ), unified_model[ray_analysis_index[:, 3] - 1, :], axis=1, ) ) # path_loss=((wavelength/(4*np.pi*paths))**2)*np.exp(paths*wave_vector*1j) path_loss = wavelength / (4 * np.pi * paths) ray_components = path_loss * (np.exp(paths * wave_vector * 1j)) # path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths) depth_slice = ray_analysis_index[:, [0, -1]] # # scatter_map indexing # for source_index in range(sourcenum): # for sink_index in range(sinknum): # scatter_map[source_index,sink_index,idx]=np.sum(ray_components[(depth_slice[:,0]-1==0)&(depth_slice[:,1]-sourcenum-1==0)]) scatter_map = scatter_net_sorttest( sourcenum, sinknum, scatter_map, depth_slice, ray_components, idx ) return scatter_map @njit(parallel=True) def scatter_net_sort(sourcenum, sinknum, scatter_map, depth_slice, ray_components, idx): # simplified scattering sort to create a scattering network using Numbas prange and loop unrolling. for sink_index in range(sinknum): for source_index in range(sourcenum): scatter_map[source_index, sink_index, idx] = np.nansum( ray_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index) ] ) return scatter_map def scatter_net_sorttest( sourcenum, sinknum, scatter_map, depth_slice, ray_components, idx ): # simplified scattering sort to create a scattering network using Numbas prange and loop unrolling. for sink_index in range(sinknum): for source_index in range(sourcenum): scatter_map[source_index, sink_index, idx] = np.nansum( ray_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index) ] ) return scatter_map def scatter_net_sortEMtest( sourcenum, sinknum, scatter_map, depth_slice, ray_components, idx ): # simplified scattering sort to create a scattering network using Numbas prange and loop unrolling. for sink_index in range(sinknum): for source_index in range(sourcenum): scatter_map[source_index, sink_index, 0, idx] = np.nansum( ray_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index), 0, ] ) scatter_map[source_index, sink_index, 1, idx] = np.nansum( ray_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index), 1, ] ) scatter_map[source_index, sink_index, 2, idx] = np.nansum( ray_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index), 2, ] ) return scatter_map @njit(parallel=True) def scatter_net_sortEM( sourcenum, sinknum, scatter_map, depth_slice, ray_components, idx ): # simplified scattering sort to create a scattering network using Numbas prange and loop unrolling. for sink_index in range(sinknum): for source_index in range(sourcenum): scatter_map[source_index, sink_index, 0, idx] = np.sum( ray_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index), 0, ] ) scatter_map[source_index, sink_index, 1, idx] = np.sum( ray_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index), 1, ] ) scatter_map[source_index, sink_index, 2, idx] = np.sum( ray_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index), 2, ] ) return scatter_map # @njit(parallel=True) def scatter_net_sortTimeEM( sourcenum, sinknum, scatter_map, depth_slice, ray_components, time_components, time_index, idx, ): # simplified scattering sort to create a scattering network using Numbas prange and loop unrolling. for sink_index in range(sinknum): for source_index in range(sourcenum): time_address = np.digitize( time_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index) ], time_index, ) scatter_map[source_index, sink_index, 0, time_address, idx] = np.sum( ray_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index), 0, ] ) scatter_map[source_index, sink_index, 1, time_address, idx] = np.sum( ray_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index), 1, ] ) scatter_map[source_index, sink_index, 2, time_address, idx] = np.sum( ray_components[ (depth_slice[:, 0] - 1 == source_index) & (depth_slice[:, 1] - sourcenum - 1 == sink_index), 2, ] ) return scatter_map def BaseNetworkProcess(total_network, wavelength, scattering_index): # take the total_network matrix, and work out the path length of each connection, and hence superposition wave_vector = (2 * np.pi) / wavelength sourcenum = np.int(np.nanmax(scattering_index[:, 0])) + 1 sinknum = np.int(np.nanmax(scattering_index[:, 1])) + 1 scatterdepth = int((len(total_network[1, :]) - 3) / 3) scatter_map = np.zeros((sourcenum, sinknum, scatterdepth), dtype="complex") depth_num = 0 for idx in range(scatterdepth): if idx == (scatterdepth - 1): if scatterdepth == 1: start_row = 0 end_row = len(total_network[:, 1]) - 1 else: start_row = ( np.max(np.where((np.isnan(total_network[:, 6 + ((idx - 1) * 3)])))) + 1 ) end_row = len(total_network[:, 1]) - 1 elif idx == 0: start_row = 0 end_row = np.max(np.where((np.isnan(total_network[:, 6 + (idx * 3)])))) else: start_row = ( np.max(np.where((np.isnan(total_network[:, 3 + (idx * 3)])))) + 1 ) end_row = np.max(np.where((np.isnan(total_network[:, 6 + ((idx) * 3)])))) for idr in range(start_row, end_row + 1): source_index = np.int(scattering_index[idr, 0]) sink_index = np.int(scattering_index[idr, 1]) path_distance = np.zeros((1)) if idx == 0: path_distance = ( distance.euclidean( total_network[idr, np.asarray([0, 1, 2]) + ((0) * 3)], total_network[idr, np.asarray([3, 4, 5]) + ((0) * 3)], ) + path_distance ) else: for idy in range(idx + 1): path_distance = ( distance.euclidean( total_network[idr, np.asarray([0, 1, 2]) + ((idy) * 3)], total_network[idr, np.asarray([3, 4, 5]) + ((idy) * 3)], ) + path_distance ) path_loss = (4 * np.pi * path_distance) / wavelength scatter_map[source_index, sink_index, idx] = (1 / path_loss) * np.exp( -path_distance * wave_vector * 1j ) + scatter_map[source_index, sink_index, idx] return scatter_map def charge_rays_environment1Dv2(sources, sinks, environment_points, point_indexing): """ Generate Ray Payload from numpy arrays of sources, sinks, and environment points, in a 1 dimensional array to match the point_indexing demands Parameters ---------- sources : TYPE DESCRIPTION. sinks : TYPE DESCRIPTION. environment_points : numpy array of xyz points for scattering in environment point_indexing : (n*m) by 2 array, 0 has the source index, and 1 has the sink index, otherwise nans' Returns ------- temp_ray_payload : array of ray_t type ray payload to be sent to GPU """ unified_model = np.append( np.append(sources, sinks, axis=0), environment_points, axis=0 ) temp_ray_payload = np.empty(point_indexing.shape[0], dtype=base_types.ray_t) local_sources = unified_model[point_indexing[:, 0] - 1, :] directions = np.zeros( (len(unified_model[point_indexing[:, 1] - 1, :]), 3), dtype=np.float32 ) norm_length = np.zeros( (len(unified_model[point_indexing[:, 1] - 1, :]), 1), dtype=np.float32 ) directions, norm_length = math_functions.calc_dv_norm( unified_model[point_indexing[:, -2] - 1, :], unified_model[point_indexing[:, -1] - 1, :], directions, norm_length, ) temp_ray_payload[:]["ox"] = unified_model[point_indexing[:, 0] - 1, 0] temp_ray_payload[:]["oy"] = unified_model[point_indexing[:, 0] - 1, 1] temp_ray_payload[:]["oz"] = unified_model[point_indexing[:, 0] - 1, 2] temp_ray_payload[:]["dx"] = directions[:, 0] temp_ray_payload[:]["dy"] = directions[:, 1] temp_ray_payload[:]["dz"] = directions[:, 2] # temp_ray_payload[:]['tx']=unified_model[point_indexing[:,1]-1,0] # temp_ray_payload[:]['ty']=unified_model[point_indexing[:,1]-1,1] # temp_ray_payload[:]['tz']=unified_model[point_indexing[:,1]-1,2] temp_ray_payload[:]["dist"] = norm_length[:, 0] temp_ray_payload[:]["intersect"] = False return temp_ray_payload def rayHits1Dv2(ray_payload, point_indexing, sink_index): """ filtering process for each raycasting stage, seprating rays which hit sinks from non-terminating rays. Parameters ---------- ray_payload : TYPE DESCRIPTION. point_indexing : TYPE DESCRIPTION. scatter_inc : TYPE, optional DESCRIPTION. The default is 1. Returns ------- filtered_network : TYPE DESCRIPTION. final_network : TYPE DESCRIPTION. filtered_index : TYPE DESCRIPTION. final_index : TYPE DESCRIPTION. """ hitmap = ~ray_payload[:]["intersect"] final_index = point_indexing[ np.all(np.array([hitmap, np.isin(point_indexing[:, -1], sink_index)]), axis=0), :, ] filtered_index = point_indexing[ np.all(np.array([hitmap, ~np.isin(point_indexing[:, -1], sink_index)]), axis=0), :, ] return filtered_index, final_index def launchRaycaster1Dv2( sources, sinks, scattering_points, io_indexing, environment_local ): # cuda.select_device(0) start = timer() first_ray_payload = charge_rays_environment1Dv2( sources, sinks, scattering_points, io_indexing ) prep_dt = timer() - start raystart = timer() ray_num = len(first_ray_payload) tri_num = len(environment_local) max_tris = 2 ** 20 triangle_chunk = np.linspace( 0, tri_num, math.ceil(tri_num / max_tris) + 1, dtype=np.int32 ) chunk_size = 2 ** 10 threads_in_block = 1024 ray_chunks = np.linspace( 0, ray_num, math.ceil(ray_num / chunk_size) + 1, dtype=np.int32 ) # for idx in range(len(triangle_chunk)-1): d_environment = cuda.device_array(len(environment_local), dtype=base_types.triangle_t) cuda.to_device(environment_local, to=d_environment) for n in range(len(ray_chunks) - 1): chunk_payload = first_ray_payload[ray_chunks[n] : ray_chunks[n + 1]] chunk_ray_size = len(chunk_payload) d_chunk_payload = cuda.device_array([chunk_ray_size], dtype=base_types.ray_t) cuda.to_device(chunk_payload, to=d_chunk_payload) # # ray_temp=np.empty((chunk_ray_size),dtype=np.bool) # ray_temp[:]=first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect'] # distance_temp=np.empty((chunk_ray_size),dtype=np.float32) # distance_temp[:]=math.inf # dist_list=cuda.to_device(distance_temp) # d_ray_flag=cuda.to_device(ray_temp) # Here, we choose the granularity of the threading on our device. We want # to try to cover the entire workload of rays and targets with simulatenous threads, so we'll # choose a grid of (source_num/16. target_num/16) blocks, each with (16, 16) threads grids = math.ceil(chunk_ray_size / threads_in_block) threads = min(ray_chunks[1] - ray_chunks[0], threads_in_block) # Execute the kernel # cuda.profile_start() # kernel1Dv2[grids, threads](d_chunk_payload,d_environment,d_ray_flag) kernel1Dv2[grids, threads](d_chunk_payload, d_environment) # cuda.profile_stop() # distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host() first_ray_payload[ ray_chunks[n] : ray_chunks[n + 1] ] = d_chunk_payload.copy_to_host() # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect']=d_chunk_payload['intersect'].copy_to_host() # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['dist']=d_chunk_payload['dist'].copy_to_host() # cuda.close() kernel_dt = timer() - raystart start = timer() RAYS_CAST = ray_num sink_index = np.arange( sources.shape[0] + 1, sources.shape[0] + 1 + sinks.shape[0] ).reshape(sinks.shape[0], 1) filtered_index, final_index = rayHits1Dv2( first_ray_payload, io_indexing, sink_index ) mem_dt = timer() - start # print("First Stage: Prep {:3.1f} s, Raycasting {:3.1f} s, Path Processing {:3.1f} s".format(prep_dt,kernel_dt,mem_dt) ) return filtered_index, final_index, RAYS_CAST def launchRaycaster1Dv3( sources, sinks, scattering_points, io_indexing, environment_local ): # cuda.select_device(0) start = timer() first_ray_payload = charge_rays_environment1Dv2( sources, sinks, scattering_points, io_indexing ) prep_dt = timer() - start raystart = timer() ray_num = len(first_ray_payload) tri_num = len(environment_local) # print('Launch Raycaster Triangles ', len(environment_local)) max_tris = 2 ** 20 triangle_chunk = np.linspace( 0, tri_num, math.ceil(tri_num / max_tris) + 1, dtype=np.int32 ) chunk_size = 2 ** 18 threads_in_block = 256 # ray_chunks=np.linspace(0,ray_num,math.ceil(ray_num/chunk_size)+1,dtype=np.int32) # for idx in range(len(triangle_chunk)-1): d_environment = cuda.device_array(len(environment_local), dtype=base_types.triangle_t) cuda.to_device(environment_local, to=d_environment) d_chunk_payload = cuda.device_array([ray_num], dtype=base_types.ray_t) cuda.to_device(first_ray_payload, to=d_chunk_payload) # # ray_temp=np.empty((chunk_ray_size),dtype=np.bool) # ray_temp[:]=first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect'] # distance_temp=np.empty((chunk_ray_size),dtype=np.float32) # distance_temp[:]=math.inf # dist_list=cuda.to_device(distance_temp) # d_ray_flag=cuda.to_device(ray_temp) # Here, we choose the granularity of the threading on our device. We want # to try to cover the entire workload of rays and targets with simulatenous threads, so we'll # choose a grid of (source_num/16. target_num/16) blocks, each with (16, 16) threads grids = math.ceil(ray_num / threads_in_block) threads = threads_in_block # Execute the kernel # cuda.profile_start() # kernel1Dv2[grids, threads](d_chunk_payload,d_environment,d_ray_flag) kernel1Dv3[grids, threads](d_chunk_payload, d_environment) # cuda.profile_stop() # distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host() first_ray_payload = d_chunk_payload.copy_to_host() # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect']=d_chunk_payload['intersect'].copy_to_host() # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['dist']=d_chunk_payload['dist'].copy_to_host() # cuda.close() kernel_dt = timer() - raystart start = timer() RAYS_CAST = ray_num sink_index = np.arange( sources.shape[0] + 1, sources.shape[0] + 1 + sinks.shape[0] ).reshape(sinks.shape[0], 1) filtered_index, final_index = rayHits1Dv2( first_ray_payload, io_indexing, sink_index ) mem_dt = timer() - start # deallocate memory on gpu #ctx = cuda.current_context() #deallocs = ctx.deallocations #deallocs.clear() # print("First Stage: Prep {:3.1f} s, Raycasting {:3.1f} s, Path Processing {:3.1f} s".format(prep_dt,kernel_dt,mem_dt) ) return filtered_index, final_index, RAYS_CAST def chunkingRaycaster1Dv2( sources, sinks, scattering_points, filtered_index, environment_local, terminate_flag ): # cuda.select_device(0) start = timer() sink_index = np.arange( sources.shape[0] + 1, sources.shape[0] + 1 + sinks.shape[0] ).reshape(sinks.shape[0], 1) scattering_point_index = np.arange( np.max(sink_index) + 1, np.max(sink_index) + 1 + scattering_points.shape[0] ).reshape(scattering_points.shape[0], 1) if not terminate_flag: target_indexing = create_model_index( filtered_index, sink_index, scattering_point_index ) else: target_indexing = create_model_index( filtered_index, sink_index, np.empty((0, 0), dtype=np.int32) ) # only target rays at sinks second_ray_payload = charge_rays_environment1Dv2( sources, sinks, scattering_points, target_indexing ) prep_dt = timer() - start raystart = timer() ray_num = len(second_ray_payload) tri_num = len(environment_local) max_tris = 2 ** 18 triangle_chunk = np.linspace( 0, tri_num, math.ceil(tri_num / max_tris) + 1, dtype=np.int32 ) # Create a container for the pixel RGBA information of our image chunk_size = 2 ** 10 threads_in_block = 1024 ray_chunks = np.linspace( 0, ray_num, math.ceil(ray_num / chunk_size) + 1, dtype=np.int32 ) # for idx in range(len(triangle_chunk)-1): d_environment = cuda.device_array(len(environment_local), dtype=base_types.triangle_t) cuda.to_device(environment_local, to=d_environment) for n in range(len(ray_chunks) - 1): chunk_payload = second_ray_payload[ray_chunks[n] : ray_chunks[n + 1]] chunk_ray_size = len(chunk_payload) # distance_temp=np.empty((chunk_ray_size),dtype=np.float32) # distance_temp[:]=math.inf # ray_temp=np.empty((chunk_ray_size),dtype=np.bool) # ray_temp[:]=False # dist_list=cuda.to_device(distance_temp) # d_ray_flag=cuda.to_device(ray_temp) # d_chunk_payload = cuda.device_array([chunk_ray_size], dtype=ray_t) d_chunk_payload = cuda.device_array([chunk_ray_size], dtype=base_types.ray_t) cuda.to_device(chunk_payload, to=d_chunk_payload) # Here, we choose the granularity of the threading on our device. We want # to try to cover the entire workload of rays and targets with simulatenous threads, so we'll # choose a grid of (source_num/16. target_num/16) blocks, each with (16, 16) threads grids = math.ceil(chunk_ray_size / threads_in_block) threads = min(ray_chunks[1] - ray_chunks[0], threads_in_block) # Execute the kernel # cuda.profile_start() kernel1Dv2[grids, threads](d_chunk_payload, d_environment) # cuda.profile_stop() # distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host() # second_ray_payload[ray_chunks[n]:ray_chunks[n+1]]=d_chunk_payload.copy_to_host() # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]=d_chunk_payload.copy_to_host() second_ray_payload[ ray_chunks[n] : ray_chunks[n + 1] ] = d_chunk_payload.copy_to_host() # cuda.close() kernel_dt = timer() - raystart start = timer() RAYS_CAST = ray_num filtered_index2, final_index2 = rayHits1Dv2( second_ray_payload, target_indexing, sink_index ) mem_dt = timer() - start # print("Second Stage: Prep {:3.1f} s, Raycasting {:3.1f} s, Path Processing {:3.1f} s".format(prep_dt,kernel_dt,mem_dt) ) return filtered_index2, final_index2, RAYS_CAST def chunkingRaycaster1Dv3( sources, sinks, scattering_points, filtered_index, environment_local, terminate_flag ): start = timer() free_mem, total_mem = cuda.current_context().get_memory_info() max_mem = np.ceil(free_mem * 0.5).astype(np.int64) ray_limit = ( np.floor(np.floor((max_mem - environment_local.nbytes) / base_types.ray_t.size) / 1e7) * 1e7 ).astype(np.int64) sink_index = np.arange( sources.shape[0] + 1, sources.shape[0] + 1 + sinks.shape[0] ).reshape(sinks.shape[0], 1) scattering_point_index = np.arange( np.max(sink_index) + 1, np.max(sink_index) + 1 + scattering_points.shape[0] ).reshape(scattering_points.shape[0], 1) prep_dt = timer() - start raystart = timer() RAYS_CAST = 0 if not terminate_flag: target_indexing = create_model_index( filtered_index, sink_index, scattering_point_index ) else: if (filtered_index.shape[0]*sink_index.shape[0])<ray_limit: target_indexing = create_model_index( filtered_index, sink_index, np.empty((0, 0), dtype=np.int32) ) # only target rays at sinks else: # the rays must fit in GPU memory, aim for no more than 80% utilisation # establish memory limits sub_filtered_index = np.array_split( filtered_index, np.ceil(filtered_index.shape[0]*sink_index.shape[0] / ray_limit).astype(int) ) chunknum = len(sub_filtered_index) filtered_index2 = np.empty((0, filtered_index.shape[1]+1), dtype=np.int32) final_index2 = np.empty((0, filtered_index.shape[1]+1), dtype=np.int32) # print('chunking total of ',target_indexing.shape[0],' rays in ',chunknum,' batches') for chunkindex in range(chunknum): sub_target=create_model_index( sub_filtered_index[chunkindex], sink_index, np.empty((0, 0), dtype=np.int32) ) # cycle the raycaster over the sub arrays threads_in_block = 256 second_ray_payload = charge_rays_environment1Dv2( sources, sinks, scattering_points, sub_target ) # ray_chunks=np.linspace(0,ray_num,math.ceil(ray_num/chunk_size)+1,dtype=np.int32) # for idx in range(len(triangle_chunk)-1): d_environment = cuda.device_array( len(environment_local), dtype=base_types.triangle_t ) cuda.to_device(environment_local, to=d_environment) d_chunk_payload = cuda.device_array( [second_ray_payload.shape[0]], dtype=base_types.ray_t ) cuda.to_device(second_ray_payload, to=d_chunk_payload) # # ray_temp=np.empty((chunk_ray_size),dtype=np.bool) # ray_temp[:]=first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect'] # distance_temp=np.empty((chunk_ray_size),dtype=np.float32) # distance_temp[:]=math.inf # dist_list=cuda.to_device(distance_temp) # d_ray_flag=cuda.to_device(ray_temp) # Here, we choose the granularity of the threading on our device. We want # to try to cover the entire workload of rays and targets with simulatenous threads, so we'll # choose a grid of (source_num/16. target_num/16) blocks, each with (16, 16) threads grids = math.ceil(second_ray_payload.shape[0] / threads_in_block) threads = threads_in_block # Execute the kernel # cuda.profile_start() # kernel1Dv2[grids, threads](d_chunk_payload,d_environment,d_ray_flag) kernel1Dv3[grids, threads](d_chunk_payload, d_environment) # cuda.profile_stop() # distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host() second_ray_payload = d_chunk_payload.copy_to_host() # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect']=d_chunk_payload['intersect'].copy_to_host() # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['dist']=d_chunk_payload['dist'].copy_to_host() # cuda.close() kernel_dt = timer() - raystart start = timer() RAYS_CAST += second_ray_payload.shape[0] temp_filtered_index2, temp_final_index2 = rayHits1Dv2( second_ray_payload, sub_target, sink_index ) mem_dt = timer() - start filtered_index2 = np.append(filtered_index2, temp_filtered_index2, axis=0) final_index2 = np.append(final_index2, temp_final_index2, axis=0) # deallocate memory on gpu # ctx = cuda.current_context() # ctx.reset() if target_indexing.shape[0] >= ray_limit: # need to split the array and process seperatly sub_target = np.array_split( target_indexing, np.ceil(target_indexing.shape[0] / ray_limit).astype(int) ) chunknum = len(sub_target) filtered_index2 = np.empty((0, target_indexing.shape[1]), dtype=np.int32) final_index2 = np.empty((0, target_indexing.shape[1]), dtype=np.int32) # print('chunking total of ',target_indexing.shape[0],' rays in ',chunknum,' batches') for chunkindex in range(chunknum): # cycle the raycaster over the sub arrays threads_in_block = 256 second_ray_payload = charge_rays_environment1Dv2( sources, sinks, scattering_points, sub_target[chunkindex] ) # ray_chunks=np.linspace(0,ray_num,math.ceil(ray_num/chunk_size)+1,dtype=np.int32) # for idx in range(len(triangle_chunk)-1): d_environment = cuda.device_array( len(environment_local), dtype=base_types.triangle_t ) cuda.to_device(environment_local, to=d_environment) d_chunk_payload = cuda.device_array( [second_ray_payload.shape[0]], dtype=base_types.ray_t ) cuda.to_device(second_ray_payload, to=d_chunk_payload) # # ray_temp=np.empty((chunk_ray_size),dtype=np.bool) # ray_temp[:]=first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect'] # distance_temp=np.empty((chunk_ray_size),dtype=np.float32) # distance_temp[:]=math.inf # dist_list=cuda.to_device(distance_temp) # d_ray_flag=cuda.to_device(ray_temp) # Here, we choose the granularity of the threading on our device. We want # to try to cover the entire workload of rays and targets with simulatenous threads, so we'll # choose a grid of (source_num/16. target_num/16) blocks, each with (16, 16) threads grids = math.ceil(second_ray_payload.shape[0] / threads_in_block) threads = threads_in_block # Execute the kernel # cuda.profile_start() # kernel1Dv2[grids, threads](d_chunk_payload,d_environment,d_ray_flag) kernel1Dv3[grids, threads](d_chunk_payload, d_environment) # cuda.profile_stop() # distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host() second_ray_payload = d_chunk_payload.copy_to_host() # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect']=d_chunk_payload['intersect'].copy_to_host() # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['dist']=d_chunk_payload['dist'].copy_to_host() # cuda.close() kernel_dt = timer() - raystart start = timer() RAYS_CAST += second_ray_payload.shape[0] temp_filtered_index2, temp_final_index2 = rayHits1Dv2( second_ray_payload, sub_target[chunkindex], sink_index ) mem_dt = timer() - start filtered_index2 = np.append(filtered_index2, temp_filtered_index2, axis=0) final_index2 = np.append(final_index2, temp_final_index2, axis=0) # deallocate memory on gpu #ctx = cuda.current_context() #ctx.reset() else: second_ray_payload = charge_rays_environment1Dv2( sources, sinks, scattering_points, target_indexing ) # print('Chunking Raycaster Triangles ', len(environment_local)) # max_tris=2**18 # triangle_chunk=np.linspace(0,tri_num,math.ceil(tri_num/max_tris)+1,dtype=np.int32) # chunk_size=2**11 threads_in_block = 256 # ray_chunks=np.linspace(0,ray_num,math.ceil(ray_num/chunk_size)+1,dtype=np.int32) # for idx in range(len(triangle_chunk)-1): d_environment = cuda.device_array(len(environment_local), dtype=base_types.triangle_t) cuda.to_device(environment_local, to=d_environment) d_chunk_payload = cuda.device_array( [second_ray_payload.shape[0]], dtype=base_types.ray_t ) cuda.to_device(second_ray_payload, to=d_chunk_payload) # # ray_temp=np.empty((chunk_ray_size),dtype=np.bool) # ray_temp[:]=first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect'] # distance_temp=np.empty((chunk_ray_size),dtype=np.float32) # distance_temp[:]=math.inf # dist_list=cuda.to_device(distance_temp) # d_ray_flag=cuda.to_device(ray_temp) # Here, we choose the granularity of the threading on our device. We want # to try to cover the entire workload of rays and targets with simulatenous threads, so we'll # choose a grid of (source_num/16. target_num/16) blocks, each with (16, 16) threads grids = math.ceil(second_ray_payload.shape[0] / threads_in_block) threads = threads_in_block # Execute the kernel # cuda.profile_start() # kernel1Dv2[grids, threads](d_chunk_payload,d_environment,d_ray_flag) kernel1Dv3[grids, threads](d_chunk_payload, d_environment) # cuda.profile_stop() # distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host() second_ray_payload = d_chunk_payload.copy_to_host() # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect']=d_chunk_payload['intersect'].copy_to_host() # first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['dist']=d_chunk_payload['dist'].copy_to_host() # cuda.close() kernel_dt = timer() - raystart start = timer() RAYS_CAST += second_ray_payload.shape[0] filtered_index2, final_index2 = rayHits1Dv2( second_ray_payload, target_indexing, sink_index ) mem_dt = timer() - start # deallocate memory on gpu #ctx = cuda.current_context() #deallocs = ctx.deallocations #deallocs.clear() # print("Second Stage: Prep {:3.1f} s, Raycasting {:3.1f} s, Path Processing {:3.1f} s".format(prep_dt,kernel_dt,mem_dt) ) return filtered_index2, final_index2, RAYS_CAST def charge_shadow_rays1D(sources, sinks, point_indexing, target_indexing): """ Generate Ray Payload from numpy arrays of sources, sinks, and environment points, in a 1 dimensional array Parameters ---------- sources : TYPE DESCRIPTION. sinks : TYPE DESCRIPTION. environment_points : numpy array of xyz points for scattering in environment point_indexing : (n*m) by 2 array, 0 has the source index, and 1 has the sink index, otherwise nans' Returns ------- temp_ray_payload : array of ray_t type ray payload to be sent to GPU """ idx = 0 targets = sinks target_num = len(targets) source_num, origin_width = np.shape(sources) origins = np.zeros((source_num * target_num, origin_width), dtype=np.float32) source_sink_index = np.full((source_num * target_num, 2), np.nan, dtype=np.int32) temp_ray_payload = np.empty([source_num * target_num], dtype=base_types.ray_t) temp_ray_payload, origins, source_sink_index = ray_charge_core_final_vector( temp_ray_payload, origins, source_sink_index, sources, targets, point_indexing, target_indexing, ) return temp_ray_payload, origins, source_sink_index def ray_charge_core_final_vector( temp_ray_payload, origins, source_sink_index, sources, targets, point_indexing, target_indexing, ): # using array allocation # idx=0 target_num = len(targets) source_num, origin_width = np.shape(sources) directions = np.zeros((len(targets), 3), dtype=np.float32) local_sources = np.zeros((len(targets), origin_width), dtype=np.float32) norm_length = np.zeros((len(targets), 1), dtype=np.float32) source_chunking = np.linspace( 0, len(temp_ray_payload), math.ceil(len(temp_ray_payload) / target_num) + 1, dtype=np.int32, ) for n in range(len(source_chunking) - 1): local_sources[:, -origin_width:] = sources[n, :] directions, norm_length = math_functions.calc_dv_norm( local_sources[:, -3:], targets, directions, norm_length ) temp_ray_payload[source_chunking[n] : source_chunking[n + 1]][ "ox" ] = local_sources[:, -3] temp_ray_payload[source_chunking[n] : source_chunking[n + 1]][ "oy" ] = local_sources[:, -2] temp_ray_payload[source_chunking[n] : source_chunking[n + 1]][ "oz" ] = local_sources[:, -1] temp_ray_payload[source_chunking[n] : source_chunking[n + 1]][ "dx" ] = directions[:, 0] temp_ray_payload[source_chunking[n] : source_chunking[n + 1]][ "dy" ] = directions[:, 1] temp_ray_payload[source_chunking[n] : source_chunking[n + 1]][ "dz" ] = directions[:, 2] # temp_ray_payload[source_chunking[n]:source_chunking[n+1]]['tx']=targets[:,0] # temp_ray_payload[source_chunking[n]:source_chunking[n+1]]['ty']=targets[:,1] # temp_ray_payload[source_chunking[n]:source_chunking[n+1]]['tz']=targets[:,2] temp_ray_payload[source_chunking[n] : source_chunking[n + 1]][ "dist" ] = norm_length[:, 0] temp_ray_payload[source_chunking[n] : source_chunking[n + 1]][ "intersect" ] = False origins[source_chunking[n] : source_chunking[n + 1], -origin_width:] = sources[ n, : ] source_sink_index[ source_chunking[n] : source_chunking[n + 1], 0 ] = point_indexing[n, 0] source_sink_index[ source_chunking[n] : source_chunking[n + 1], 1 ] = target_indexing.ravel() return temp_ray_payload, origins, source_sink_index def shadowRaycaster(filtered_network, sinks, filtered_index, environment_local): start = timer() target_indexing = np.full((len(sinks), 1), np.nan, dtype=np.float32).ravel() target_indexing[:] = range(len(sinks)) second_ray_payload, second_origin, io_index2 = charge_shadow_rays1D( filtered_network, sinks, filtered_index, target_indexing ) prep_dt = timer() - start raystart = timer() ray_num = len(second_ray_payload) tri_num = len(environment_local) max_tris = 2 ** 16 triangle_chunk = np.linspace( 0, tri_num, math.ceil(tri_num / max_tris) + 1, dtype=np.int32 ) # Create a container for the pixel RGBA information of our image chunk_size = 2 ** 21 threads_in_block = 1024 ray_chunks = np.linspace( 0, ray_num, math.ceil(ray_num / chunk_size) + 1, dtype=np.int32 ) # sync_stream=cuda.stream() for idx in range(len(triangle_chunk) - 1): d_environment = cuda.device_array( len(environment_local[triangle_chunk[idx] : triangle_chunk[idx + 1]]), dtype=base_types.triangle_t, ) cuda.to_device( environment_local[triangle_chunk[idx] : triangle_chunk[idx + 1]], to=d_environment, ) for n in range(len(ray_chunks) - 1): chunk_payload = second_ray_payload[ray_chunks[n] : ray_chunks[n + 1]] chunk_ray_size = len(chunk_payload) distance_temp = np.empty((chunk_ray_size), dtype=np.float32) distance_temp[:] = math.inf ray_temp = np.empty((chunk_ray_size), dtype=np.bool) ray_temp[:] = False dist_list = cuda.to_device(distance_temp) d_ray_flag = cuda.to_device(ray_temp) # d_chunk_payload = cuda.device_array([chunk_ray_size], dtype=ray_t) d_chunk_payload = cuda.device_array([chunk_ray_size], dtype=base_types.ray_t) cuda.to_device(chunk_payload, to=d_chunk_payload) # Here, we choose the granularity of the threading on our device. We want # to try to cover the entire workload of rays and targets with simulatenous threads, so we'll # choose a grid of (source_num/16. target_num/16) blocks, each with (16, 16) threads grids = math.ceil(chunk_ray_size / threads_in_block) threads = min(ray_chunks[1] - ray_chunks[0], threads_in_block) # Execute the kernel # cuda.profile_start() kernel1D[grids, threads]( d_chunk_payload, d_environment, d_ray_flag, dist_list ) # second_ray_payload[ray_chunks[n]:ray_chunks[n+1]]=d_chunk_payload.copy_to_host() second_ray_payload[ray_chunks[n] : ray_chunks[n + 1]][ "intersect" ] = d_ray_flag.copy_to_host() # second_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['dist']=dist_list.copy_to_host() # cuda.profile_stop() # distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host() # sync_stream.synchronize() kernel_dt = timer() - raystart menstart = timer() RAYS_CAST = ray_num filtered_network2, final_network2, filtered_index2, final_index2 = rayHits1D( second_ray_payload, io_index2, scatter_inc=1, origins_local=second_origin ) mem_dt = timer() - menstart print( "Stage: Prep {:3.1f} s, Raycasting {:3.1f} s, Path Processing {:3.1f} s".format( prep_dt, kernel_dt, mem_dt ) ) return filtered_network2, final_network2, filtered_index2, final_index2, RAYS_CAST def rayHits1D( ray_payload, point_indexing, scatter_inc=1, origins_local=None, initial_network=None ): """ filtering process for each raycasting stage, seprating rays which hit sinks from non-terminating rays. ---------- ray_payload : TYPE DESCRIPTION. point_indexing : TYPE DESCRIPTION. scatter_inc : TYPE, optional DESCRIPTION. The default is 1. origins_local : TYPE, optional DESCRIPTION. The default is None. initial_network : TYPE, optional DESCRIPTION. The default is None. Returns ------- filtered_network : TYPE DESCRIPTION. final_network : TYPE DESCRIPTION. filtered_index : TYPE DESCRIPTION. final_index : TYPE DESCRIPTION. """ hitmap = ~ray_payload[:]["intersect"] filter_vector = np.ravel( np.nonzero(np.all(np.asarray([hitmap, np.isnan(point_indexing[:, 1])]), axis=0)) ) output_vector = np.ravel( np.nonzero( np.all(np.asarray([hitmap, ~np.isnan(point_indexing[:, 1])]), axis=0) ) ) # final_network = np.zeros( (np.max(np.shape(output_vector)), 3 + (scatter_inc * 3)), dtype=np.float32 ) final_network[:, 0:-3] = origins_local[output_vector, :] final_network[:, -3] = ray_payload[output_vector]["tx"] final_network[:, -2] = ray_payload[output_vector]["ty"] final_network[:, -1] = ray_payload[output_vector]["tz"] # filtered_network = np.zeros( (np.max(np.shape(filter_vector)), 3 + (scatter_inc * 3)), dtype=np.float32 ) filtered_network[:, 0:-3] = origins_local[filter_vector, :] filtered_network[:, -3] = ray_payload[filter_vector]["tx"] filtered_network[:, -2] = ray_payload[filter_vector]["ty"] filtered_network[:, -1] = ray_payload[filter_vector]["tz"] # # final_network=hit_network[output_vector,:] # filtered_network=np.delete(hit_network,output_vector,axis=0) final_index = point_indexing[output_vector, :] filtered_index = point_indexing[filter_vector, :] return filtered_network, final_network, filtered_index, final_index def rotate_view(vis): ctr = vis.get_view_control() ctr.rotate(2.0, 0.0) return False # @njit def create_model_index(source_index, sink_index, scattering_point_index): """ Create an index for the model environment, first sources, then sinks, the scattering points in an uninteruppted chain Thus Parameters ---------- sources : TYPE DESCRIPTION. sinks : TYPE DESCRIPTION. scattering_points : TYPE DESCRIPTION. Returns ------- io_indexing : TYPE DESCRIPTION. """ io_indexing = np.zeros( ( source_index.shape[0] * (sink_index.shape[0] + scattering_point_index.shape[0]), source_index.shape[1] + 1, ), dtype=np.int32, ) idx = 0 for n in np.arange(0, source_index.shape[0]): if scattering_point_index.shape[0] == 0: io_indexing[ np.arange( idx, idx + (sink_index.shape[0] + scattering_point_index.shape[0]) ), -1, ] = sink_index.ravel() else: io_indexing[ np.arange( idx, idx + (sink_index.shape[0] + scattering_point_index.shape[0]) ), -1, ] = np.append(sink_index, scattering_point_index, axis=0).ravel() io_indexing[ np.arange( idx, idx + (sink_index.shape[0] + scattering_point_index.shape[0]) ), 0 : source_index.shape[1], ] = source_index[n, :] idx += sink_index.shape[0] + scattering_point_index.shape[0] # then boolean check and filter to ensure there are no loops, aiming next ray at originating point return io_indexing[np.not_equal(io_indexing[:, -2], io_indexing[:, -1]), :]
[docs]def workchunkingv2( sources, sinks, scattering_points, environment, max_scatter, line_of_sight=True ): """ Raycasting index creation and assignment to raycaster, upper bound is around 4.7e8 rays at a time, there is already chunking to prevent overflow of the GPU memory and timeouts Parameters ---------- sources : n*3 numpy array of float sinks : m*3 numpy array of float scattering_points : o*3 numpy array of float environment : numpy array of triangle_t max_scatter : int line_of_sight : boolean Returns --------- full_index : 2D numpy array of ints the index for all successful rays cast from source coordinates, to any scattering points, to the sink point for each entry RAYS_CAST : int the number of rays cast in this launch. """ # temp function to chunk the number of rays to prevent creation of ray arrays to large for memory # print('WorkChunking Triangles ',len(environment)) RAYS_CAST=0 raycasting_timestamp = timer() ray_estimate = ( sources.shape[0] * (sinks.shape[0] + scattering_points.shape[0]) ) + ( sources.shape[0] * (scattering_points.shape[0] * sinks.shape[0] * (max_scatter)) ) # print("Total of {:3.1f} rays required".format(ray_estimate)) # establish memory limits free_mem, total_mem = cuda.current_context().get_memory_info() max_mem = np.ceil(free_mem * 0.8).astype(np.int64) ray_limit = ( np.floor(np.floor((max_mem - environment.nbytes) / base_types.ray_t.size) / 1e7) * 1e7 ).astype(np.int64) # establish index boundaries source_index = np.arange(1, sources.shape[0] + 1).reshape( sources.shape[0], 1 ) # index starts at 1, leaving 0 as a null sink_index = np.arange( sources.shape[0] + 1, sources.shape[0] + 1 + sinks.shape[0] ).reshape(sinks.shape[0], 1) scattering_point_index = np.arange( np.max(sink_index) + 1, np.max(sink_index) + 1 + scattering_points.shape[0] ).reshape(scattering_points.shape[0], 1) # implement chunking if max_scatter == 1: full_index = np.empty((0, 2), dtype=np.int32) io_indexing = create_model_index( source_index, sink_index, scattering_point_index ) if io_indexing.shape[0] >= ray_limit: # need to split the array and process seperatly sub_io = np.array_split( io_indexing, np.ceil(io_indexing.shape[0] / ray_limit).astype(int) ) chunknum = len(sub_io) for chunkindex in range(chunknum): _, temp_index, first_wave_Rays = launchRaycaster1Dv3( sources, sinks, scattering_points, sub_io[chunkindex], copy.deepcopy(environment), ) full_index = np.append(full_index, temp_index, axis=0) RAYS_CAST += first_wave_Rays else: _, temp_index, first_wave_Rays = launchRaycaster1Dv3( sources, sinks, scattering_points, io_indexing, copy.deepcopy(environment), ) full_index = np.append(full_index, temp_index, axis=0) RAYS_CAST = first_wave_Rays elif max_scatter == 2: full_index = np.empty((0, 3), dtype=np.int32) if line_of_sight: io_indexing = create_model_index( source_index, sink_index, scattering_point_index ) else: # drop line of sight rays from queue io_indexing = create_model_index( source_index, np.empty((0, 1)), scattering_point_index ) if io_indexing.shape[0] >= ray_limit: # need to split the array and process separately sub_io = np.array_split( io_indexing, np.ceil(io_indexing.shape[0] / ray_limit).astype(int) ) chunknum = len(sub_io) final_index = np.empty((0, 3), dtype=np.int32) RAYS_CAST = 0 for chunkindex in range(chunknum): ( temp_filtered_index, temp_final_index, first_wave_Rays, ) = launchRaycaster1Dv3( sources, sinks, scattering_points, sub_io[chunkindex], copy.deepcopy(environment), ) # filtered_index = np.append(filtered_index, temp_filtered_index, axis=0) # final_index = np.append(final_index, temp_final_index, axis=0) if temp_filtered_index.shape[0] == 0: temp_filtered_index2 = np.empty((0, 3), dtype=np.int32) temp_final_index2 = np.empty((0, 3), dtype=np.int32) second_wave_rays = 0 else: ( temp_filtered_index2, temp_final_index2, second_wave_rays, ) = chunkingRaycaster1Dv3( sources, sinks, scattering_points, temp_filtered_index, environment, terminate_flag=True, ) RAYS_CAST += first_wave_Rays + second_wave_rays # create combined path network host_size = np.shape(temp_final_index) host_padding = np.zeros((host_size[0], 1), dtype=np.int32) temp_full_index = np.append( np.append(temp_final_index, host_padding, axis=1), temp_final_index2, axis=0, ) full_index = np.append(full_index, temp_full_index, axis=0) else: filtered_index, final_index, first_wave_Rays = launchRaycaster1Dv3( sources, sinks, scattering_points, io_indexing, copy.deepcopy(environment), ) if filtered_index.shape[0] == 0: filtered_index2 = np.empty((0, 3), dtype=np.int32) final_index2 = np.empty((0, 3), dtype=np.int32) second_wave_rays = 0 else: filtered_index2, final_index2, second_wave_rays = chunkingRaycaster1Dv3( sources, sinks, scattering_points, filtered_index, environment, terminate_flag=True, ) # cuda.profile_stop() # print('WorkChunkingMaxScatter2 Triangles ', len(environment), 'Filtered Rays Stage 2', len(filtered_index2), 'Final Rays Stage 2',len(final_index2)) start = timer() # filtered_network2,final_network2,filtered_index2,final_index2=rayHits(second_ray_payload,distmap2,io_index2,scatter_inc=2,origins_local=second_origin) RAYS_CAST = first_wave_Rays + second_wave_rays # create combined path network host_size = np.shape(final_index) host_padding = np.zeros((host_size[0], 1), dtype=np.int32) temp_full_index = np.append( np.append(final_index, host_padding, axis=1), final_index2, axis=0 ) full_index = np.append(full_index, temp_full_index, axis=0) elif max_scatter == 3: full_index = np.empty((0, 4), dtype=np.int32) chunknum = np.minimum(sources.shape[0], np.int(np.ceil(ray_estimate / 2e8))) chunknum = np.maximum(2, chunknum) source_chunking = np.linspace(0, sources.shape[0], chunknum, dtype=np.int32) for chunkindex in range(source_chunking.size - 1): if line_of_sight: io_indexing = create_model_index( source_index[ source_chunking[chunkindex] : source_chunking[chunkindex + 1] ], sink_index, scattering_point_index, ) filtered_index, final_index, first_wave_Rays = launchRaycaster1Dv3( sources, sinks, scattering_points, io_indexing, environment ) else: # drop line of sight rays from queue io_indexing = create_model_index( source_index[ source_chunking[chunkindex] : source_chunking[chunkindex + 1] ], np.empty((0, 1)), scattering_point_index, ) filtered_index, final_index, first_wave_Rays = launchRaycaster1Dv3( sources, sinks, scattering_points, io_indexing, environment ) filtered_index2, final_index2, second_wave_rays = chunkingRaycaster1Dv3( sources, sinks, scattering_points, filtered_index, environment, False ) if filtered_index2.shape[0] * sinks.shape[0] > 2e8: temp_chunks = np.int( np.ceil((filtered_index2.shape[0] * sinks.shape[0]) / 2e8) ) temp_chunking = np.linspace( 0, filtered_index2.shape[0], temp_chunks, dtype=np.int32 ) temp_index = np.empty((0, 4), dtype=np.int32) for temp_chunkindex in range(temp_chunking.shape[0] - 1): ( filtered_index3, final_index3_part, third_wave_rays, ) = chunkingRaycaster1Dv3( sources, sinks, scattering_points, filtered_index2[ temp_chunking[temp_chunkindex] : temp_chunking[ temp_chunkindex + 1 ], :, ], environment, True, ) temp_index = np.append(temp_index, final_index3_part, axis=0) final_index3 = temp_index else: filtered_index3, final_index3, third_wave_rays = chunkingRaycaster1Dv3( sources, sinks, scattering_points, filtered_index2, environment, True, ) # cuda.profile_stop() start = timer() # filtered_network2,final_network2,filtered_index2,final_index2=rayHits(second_ray_payload,distmap2,io_index2,scatter_inc=2,origins_local=second_origin) RAYS_CAST = first_wave_Rays + second_wave_rays + third_wave_rays # create combined path network host_size = np.shape(final_index) host_nans = np.zeros((host_size[0], 1), dtype=np.int32) temp_total = np.append( np.append(final_index, host_nans, axis=1), final_index2, axis=0 ) host_size2 = np.shape(temp_total) host_nans2 = np.zeros((host_size2[0], 1), dtype=np.int32) full_index_temp = np.append( np.append(temp_total, host_nans2, axis=1), final_index3, axis=0 ) full_index = np.append(full_index, full_index_temp, axis=0) # process for scattering matrix raycastingduration = raycasting_timestamp - timer() # print("Raycasting Duration: {:3.1f} s, Total Rays: {:3.1f}".format(raycastingduration,RAYS_CAST) ) return full_index, RAYS_CAST
def create_scatter_index( source_index, sink_index, scattering_point_index, scattering_mask ): # create target index for given scattering depth target_index = np.empty((0, np.max(scattering_mask) + 2), dtype=np.int32) # temp_index=create_model_index(source_index, sink_index, scattering_point_index) # stop_index=temp_index[np.isin(temp_index[:,-1],sink_index),:] # start_index=temp_index[~np.isin(temp_index[:,-1],sink_index),:] # final_index=create_model_index(start_index, sink_index, scattering_point_index) # target_index=np.append(np.append(stop_index,np.zeros((stop_index.shape[0],1),dtype=np.int32),axis=1),final_index,axis=0) for launch_round in range(np.max(scattering_mask) + 1): # create list of allowed points which always has all sinks. if launch_round == 0: allowed_illuminators = source_index # allowed_points=np.append(np.where(scattering_mask>=(launch_round+1))[0]+1,(np.where(scattering_mask==0)[0]+1)) # allowed_points=copy.deepcopy(sink_index).ravel() allowed_scatter_points = ( np.where(scattering_mask >= (launch_round + 1))[0] + 1 ) temp_index = create_model_index( allowed_illuminators, sink_index, allowed_scatter_points.reshape(allowed_scatter_points.shape[0], 1), ) target_index = np.append( target_index, np.append( temp_index, np.zeros( ( temp_index.shape[0], target_index.shape[1] - temp_index.shape[1], ), dtype=np.int32, ), axis=1, ), axis=0, ) else: allowed_illuminators = ( np.where(scattering_mask >= (launch_round + 1))[0] + 1 ) # allowed_points=np.append(np.where(scattering_mask>=(launch_round+1))[0]+1,(np.where(scattering_mask==0)[0]+1)) allowed_scatter_points = scattering_point_index propagate_index = np.where( target_index[:, launch_round] > np.max(sink_index) )[0] drain_trim = np.delete( propagate_index, np.isin( target_index[propagate_index, launch_round], allowed_illuminators ), ) drain_temp = create_model_index( target_index[drain_trim, 0 : launch_round + 1], sink_index, np.empty((0)), ) illuminator_index = propagate_index[ np.isin( target_index[propagate_index, launch_round], allowed_illuminators ) ] forward_temp = create_model_index( target_index[illuminator_index, 0 : launch_round + 1], sink_index, allowed_scatter_points.reshape(allowed_scatter_points.shape[0], 1), ) trimmed_index = np.delete(target_index, propagate_index, axis=0) target_index = np.append( trimmed_index, np.append( np.append(drain_temp, forward_temp, axis=0), np.zeros( ( drain_temp.shape[0] + forward_temp.shape[0], target_index.shape[1] - drain_temp.shape[1], ), dtype=np.int32, ), axis=1, ), axis=0, ) # so far this return target_index def integratedraycastersetup( sources, sinks, scattering_points, environment, scattering_mask ): # temp function to chunk the number of rays to prevent creation of ray arrays to large for memory raycasting_timestamp = timer() max_scatter = np.max(scattering_mask) if max_scatter == 0: ray_estimate = sources * sinks elif max_scatter == 1: ray_estimate = ( sources * (sinks + scattering_points.shape[0]) + sources * scattering_points.shape[0] * sinks ) elif max_scatter == 2: ray_estimate = ( sources * (sinks + scattering_points.shape[0]) + sources * scattering_points.shape[0] * sinks + sources * scattering_points.shape[0] * (scattering_points.shape[0]) * sinks ) # print("Total of {:3.1f} rays required".format(ray_estimate)) # establish index boundaries source_index = np.arange(1, sources + 1).reshape( sources, 1 ) # index starts at 1, leaving 0 as a null sink_index = np.arange(sources + 1, sources + 1 + sinks).reshape(sinks, 1) scattering_point_index = np.arange( np.max(sink_index) + 1, np.max(sink_index) + 1 + (scattering_points.shape[0] - sources - sinks), ).reshape((scattering_points.shape[0] - sources - sinks), 1) # implement chunking full_index = np.empty((0, np.max(scattering_mask) + 2), dtype=np.int32) chunknum = np.minimum(sources, np.int(np.ceil(ray_estimate / 2e8))) chunknum = np.maximum(2, chunknum) source_chunking = np.linspace(0, sources, chunknum, dtype=np.int32) for chunkindex in range(source_chunking.size - 1): io_indexing = create_scatter_index( source_index[source_chunking[chunkindex] : source_chunking[chunkindex + 1]], sink_index, scattering_point_index, scattering_mask, ) # io_indexing=create_model_index(source_index[source_chunking[chunkindex]:source_chunking[chunkindex+1]],sink_index,np.empty((0,1),dtype=np.int32)) full_index = np.append( full_index, integratedRaycaster(io_indexing, scattering_points, environment), axis=0, ) # full_index=create_scatter_index(source_index,sink_index,scattering_point_index,scattering_mask) raycastingduration = timer() - raycasting_timestamp # print("Raycasting Duration: {:3.1f} s, Total Rays: {:3.1f}".format(raycastingduration,full_index.shape[0]) ) return full_index, io_indexing if __name__ == "__main__": print("this is a library")