library(RCIndex)
library(RCodeGen)
source("nativeGen.R")

#Identify routines that are deprecated

 # add . to pick up our local cuda.h   Make certain to update this when we get a new SDK.
 # copied it here to add const declarations on some parameters.
 # Perhaps could set the const on the CLang objects programmatically.

incs = c(".", "/usr/local/cuda/include")
# when we include curand_kernel.h, clang goes nuts with errors.
# So we up the limit.
tu = createTU("tu.c", includes = incs, args = c("-ferror-limit=10000", "-fparse-all-comments"), verbose = FALSE)

  # Should filter by name here, not below
r = getRoutines(tu, FALSE)

fn = sapply(r, function(x) getFileName(x@def))

 # have to handle . in getwd(). 
r.cu = r[grepl(sprintf("(%s)", paste(gsub("\\.", "\\\\./", incs), collapse = "|")), fn)]

names(r.cu)[!grepl("^cu", names(r.cu))]
# All are make_...  54 of them.  Now with curand.h we get additional entries.

# Find all routines that return either cudaError_t
status = sapply(r.cu, function(x) getName(x@returnType) %in% c("CUresult", "cudaError_t", "enum cudaError"))
table(status)

# For cuda and not cublas, there are only 2 that don't return CUresult/cudaError_t
#  cudaGetErrorString and cudaCreateChannelDesc().
names(r.cu)[!status]


ds = getDataStructures(tu)
fn = sapply(ds, function(x) getFileName(x$def))
ds.cu = ds[grepl(sprintf("(%s)", paste(gsub("\\.", "\\\\./", incs), collapse = "|")), fn)]


###############


enums =  getEnums(tu)


###################################################################################################
#
# Context routines
ctx = grep("^cuCtx", names(r.cu), value = TRUE)
# deprecated ones are cuCtxAttach and cuCtxDetach.
ctx = setdiff(ctx, c("cuCtxAttach", "cuCtxDetach"))
# What about cuDeviceCanAccessPeer - not a context thing, in cuDevice

generateCode(r.cu[ctx], "Device")

################

dev = grep("^cu(da)?Device", names(r.cu), value = TRUE)
# Ignore GetName for now.
dev = setdiff(dev, c("cuDeviceComputeCapability", "cuDeviceGetProperties")) # "cuDeviceGetName", "cuDeviceGetByPCIBusId", "cuDeviceGetPCIBusId"))

generateCode(r.cu[dev], "Device")

######

mod = grep("^cuModule", names(r.cu), value = TRUE)
mod = setdiff(mod, c("cuModuleLoadDataEx"))
#cuda.createNativeProxy(r.cu$cuModuleLoad)

generateCode(r.cu[mod], "Module")


#######
func = grep("^cuFunc", names(r.cu), value = TRUE)
generateCode(r.cu[func], "Function")

########

ev = grep("^cuEvent", names(r.cu), value = TRUE)
generateCode(r.cu[ev], "Event")

#########

ev = grep("^cuStream", names(r.cu), value = TRUE)
generateCode(r.cu[ev], "Stream")


############

mem = grep("^cuMem", names(r.cu), value = TRUE)
mem = grep("^cu(da)?Mem", names(r.cu), value = TRUE)
   # cuMemGetInfo works fine when autogenerated, but we have a manual version that
   # simplifies the result to a numeric vector.  Could identify this programmatically
   # or try to simplify in the R code.
mem = setdiff(mem, "cuMemGetInfo")
#lapply(r.cu[mem], cuda.createNativeProxy)
generateCode(r.cu[mem], "Memory")

# Other memory related routines
grep("^cu.+Mem", names(r.cu), value = TRUE)

# [Done] event, stream, 
# memory - richer types e.g. array,...
# texture, surface



###############
ds.cuTypes = ds.cu[ grep("^(cu|CU)", names(ds.cu), value = TRUE) ]
ds.cuTypes = ds.cuTypes[  !sapply(ds.cuTypes, function(x) x$def$kind == CXCursor_EnumDecl || length(x$fields) == 0) ]


structs = sapply(ds.cu, function(x) x$def$kind == CXCursor_StructDecl && length(x$fields) > 0 && getName(x$def) != "")
ds.cuTypes = ds.cu[structs]
#XXX Avoid duplicate names, e.g. for typedef and struct.
# All structs.

sizeofCode = CRoutineDefinition("R_getSizeofStructs", 
               c('extern "C"',
                 "SEXP R_getSizeofStructs()", "{",
                 "SEXP r_ans, names;",
                 sprintf("unsigned int i, n = %d;", length(ds.cuTypes)),
                 "PROTECT(r_ans = NEW_INTEGER(n));",
                 "PROTECT(names = NEW_CHARACTER(n));",
                 "",
                 sprintf('INTEGER(r_ans)[i] = sizeof(%s);\n    SET_STRING_ELT(names, i++, mkChar("%s"));',
                          sapply(ds.cuTypes, function(x) getName(getType(x$def))), names(ds.cuTypes)),
                 "",
                 "SET_NAMES(r_ans, names);",
                 "UNPROTECT(2);",
                 "return(r_ans);", "}"))#, declaration = "SEXP R_getSizeofStructs()")

writeCode(as(sizeofCode, "character"), "../src/sizeofStructs.cpp",
            c('"RCUDA.h"', sprintf("<%s>", basename(unique(sapply(ds.cuTypes, function(x) getFileName(x$def)))))))

typeNames = c("int", "long", "short", "char", "float", "double", names(ds.cuTypes))

showSizeofs = cat(c(sprintf("// autogenerated %s. See TU/clang.R", as.character(Sys.time())),
   "void",
  "showSizeofs()",
  "{",
  'fprintf(stdout, "CUDAStructSizes = c(\\n");',
  sprintf('fprintf(stdout, " %s = %%dL%s\\n", (int) sizeof(%s));', typeNames, c(rep(",", length(typeNames) - 1L), ""), typeNames),
  'fprintf(stdout, ")\\n");',
  "}"), sep = "\n", file = "../showSizeofs.cu")





# Or a lookup routine which just computes one per call.
code = c("SEXP", "R_getStructSizeof(SEXP r_id)", "{", "const char *target = CHAR(STRING_ELT(r_id));",
paste(sprintf('if(strcmp(target, "%s") == 0) return(ScalarInteger(sizeof(%s)));', names(ds.cu[structs]), names(ds.cu[structs])), collapse = "\nelse "),
  "else return(ScalarReal(NA_REAL));",
  "}")


########################################
manual = c("cudaGetExportTable", "cuGetExportTable", "cudaConfigureCall", "cudaSetupArgument")

incs = c(".", "/usr/local/cuda/include")
rincs = c(incs, sprintf("%s/include", R.home()), sprintf("%s/../src/include", R.home()))
cfiles = list.files("../src", pattern = "*.c$", full.names = TRUE)
cfiles = grep("^auto", cfiles, invert = TRUE, value = TRUE)
kalls = unlist(lapply(cfiles, findCalls, includes = rincs))

cuCalls = grep("^cu", unique(unlist(kalls)), value = TRUE)
deprecated = scan("../deprecated", what = "", quiet = TRUE)
missingOnes = setdiff(names(r.cu), c(cuCalls, deprecated, manual))
#missing = setdiff(missing, deprecated)

w = sapply(r.cu[missingOnes], function(x)  { toks = getCursorTokens(x@def) ; any(grepl("__device", toks)) && !any(grepl("__host", toks))})

want = grep("(^make|Mip|Ipc|Texture|TexRef|Tex|Surface|Surf|curand)", missingOnes, invert = TRUE, value = TRUE)

generateCode(r.cu[want], "Other")

##############

structCode = lapply(list(ds.cu$CUDA_ARRAY_DESCRIPTOR, ds.cu$CUDA_ARRAY3D_DESCRIPTOR), makeCCopyStructCode)
writeCode(structCode, "../src/structCopy.c", lang = "C")
