### Name: tarExtract
### Title: Extract the contents of entries in a gzipped tar file
### Aliases: tarExtract
### Keywords: IO

### ** Examples


  filename = system.file("sampleData", "OmegahatXSL_0.2-0.tar.gz", package = "Rcompression")

     # Get the contents of two files.
  raws = tarExtract(filename, c("OmegahatXSL/XSL/env.xsl", "OmegahatXSL/XSL/Todo.xsl"))
     # Now convert the raw vectors to text since we know what we are
     # dealing with.
  sapply(raws, rawToChar)

    # or in one step
  raws = tarExtract(filename, c("OmegahatXSL/XSL/env.xsl", "OmegahatXSL/XSL/Todo.xsl"), convert = rawToChar)

     # Extract files in a directory.
  filename = system.file("sampleData", "OmegahatXSL_0.2-0.tar.gz", package = "Rcompression")
  i = tarInfo(filename)

     # Check there is such a directory
  i$type == "DIRTYPE" & i$file == "OmegahatXSL/XSL/"

  files = i$file[dirname(i$file) ==  "OmegahatXSL/XSL"]
  z = tarExtract(filename, files, convert = rawToChar)
  nchar(z)

    # This example illustrates how we can process the contents of each
    # file as it is extracted.
    # The particular computation is uninteresting but the approach is intended
    # to illustrate that we can extract some information from the
    # contents and put it somewhere and move on to the next file. This
    # is useful if the archive has data across multiple files that can
    # be dymaically merged into a single R data structure.
 
  filename = system.file("sampleData", "OmegahatXSL_0.2-0.tar.gz", package = "Rcompression")
  lineCounts = numeric()
  countLines =
     function(contents, fileName = "", verbose = TRUE) {

        if (verbose) cat(fileName, "\n")
        numLines = length(strsplit(rawToChar(contents), "\\n")[[1]])
        lineCounts[fileName] <<- numLines
        numLines
     }
  i = tarInfo(filename)
  files = i$file[!( i$type %in% "DIRTYPE")]

    # Now we are ready to run the code.
  tarExtract(filename, files,  countLines)

    # Alternatively, collect all the information and then
    # convert each one in turn at the end.
    # This is only marginally faster, if at all and consumes
    # a lot more memory as when we perform the conversion
    # we have all of the contents in memory.
    # One measurment of speed was 38 seconds to 39.

    # With the changes to avoid the accordion growth of the raw
    # vector for each chunk of file, the comparison
    # is .969 versus .537.  So much faster overall, and this
    # version becomes relatively quicker.  But consumes more memory.

  tarExtract(filename, files,  convert = countLines, verbose = FALSE)

  max(i$size)

  # Dealing with raw data rather than a file.
 filename = system.file("sampleData", "OmegahatXSL_0.2-0.tar.bz2", package = "Rcompression")
 f = bzfile(filename, "rb")
 data = readBin(f, "raw", 1000000)
 close(f)

 tarInfo(data)

 targetFiles = c("OmegahatXSL/XSL/env.xsl", "OmegahatXSL/XSL/Todo.xsl")
 raws = tarExtract(data, targetFiles, convert = rawToChar)

 filename = system.file("sampleData", "OmegahatXSL_0.2-0.tar.gz", package = "Rcompression")
 f = gzfile(filename, "rb")
 data = readBin(f, "raw", 1000000)
 close(f)

 tarInfo(data)



