1+ #
2+ # Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
3+ # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+ #
5+ # This code is free software; you can redistribute it and/or modify it
6+ # under the terms of the GNU General Public License version 3 only, as
7+ # published by the Free Software Foundation.
8+ #
9+ # This code is distributed in the hope that it will be useful, but WITHOUT
10+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+ # version 3 for more details (a copy is included in the LICENSE file that
13+ # accompanied this code).
14+ #
15+ # You should have received a copy of the GNU General Public License version
16+ # 3 along with this work; if not, write to the Free Software Foundation,
17+ # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+ #
19+ # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+ # or visit www.oracle.com if you need additional information or have any
21+ # questions.
22+ #
23+
24+ # ' Run your R code faster with FastR!
25+ # '
26+ # ' @description
27+ # ' FastR is an alternative implementation of the R programming language,
28+ # ' which provides superb performance for computation intensive longer
29+ # ' running jobs, but takes bit more time to warm-up. The performance
30+ # ' of FastR is especially good with pure R code with loops, but it can
31+ # ' also deal with C/C++/Fortran code in R packages.
32+ # '
33+ # ' Package \emph{fastRCluster} lets you run FastR inside GNU-R via PSOCK cluster.
34+ # ' With this package, you can move your performance critical R algorithms to FastR,
35+ # ' but keep the rest of your code-base on GNU-R. You can also use this package
36+ # ' to gradually move all your code to FastR.
37+ # '
38+ # ' We recommend using fastRCluster as a back-end for the \emph{future} package.
39+ # ' Keep your configuration of the \emph{future} package isolated from the rest
40+ # ' of the system to be able to simply switch between FastR and other back-ends.
41+ # '
42+ # ' @details
43+ # ' This package does not come with pre-installed FastR. However, FastR can be
44+ # ' installed using the \code{installFastR} function. Once FastR is installed,
45+ # ' you can create cluster nodes that delegate to FastR using \code{makeFastRCluster}.
46+ # ' Note: like with PSOCK cluster, you have to install required packages on the FastR engine.
47+ # ' You can use \code{fastRClusterInstallPackages} to install the necessary packages.
48+ # '
49+ # ' FastR leverages dynamic just-in-time compilation. R functions are first interpreted
50+ # ' and then compiled. The first few executions are much slower. In order to re-use
51+ # ' the compiled code as much as possible, it is good idea to first transfer all the
52+ # ' necessary R functions to the cluster nodes using \code{clusterExport}.
53+ # ' If you send large and computation heavy R function via, e.g., \code{clusterApply},
54+ # ' it will be always deserialized to a different function on the other end in FastR and hence
55+ # ' no compiled code will be reused.
56+ # '
57+ # ' @examples
58+ # ' library(fastRCluster)
59+ # '
60+ # ' # downloads and installs FastR, note: this may take a while
61+ # ' installFastR()
62+ # '
63+ # ' # use the cluster package with FastR
64+ # ' cl <- makeFastRCluster()
65+ # ' print(cl)
66+ # ' # prints: FastR socket cluster with 1 nodes on host ‘localhost’
67+ # '
68+ # ' # install required packages on FastR
69+ # ' fastRClusterInstallPackages('rlang')
70+ # '
71+ # ' # use the cluster package with FastR
72+ # ' # R.version will show that we are running that code on FastR
73+ # ' parallel::clusterApply(cl, 'dummy', function(...) R.version)
74+ # '
75+ # ' # transfer data and a helper function to the global environmnet of the cluster nodes
76+ # ' largeDataSet <- matrix(runif(1000000), 1000, 1000)
77+ # ' myComputation <- function(x) {
78+ # ' x <- x/sum(x)
79+ # ' res <- 0
80+ # ' colsums <- colSums(x)
81+ # ' rowsums <- rowSums(x)
82+ # ' for(i in seq_along(1:nrow(x))){
83+ # ' for(j in seq_along(1:ncol(x))){
84+ # ' temp <- log((x[i,j]/(colsums[j]*rowsums[i])))
85+ # ' res <- res + x[i,j] * if(is.finite(temp)) temp else 0
86+ # ' }
87+ # ' }
88+ # ' res
89+ # ' }
90+ # ' parallel::clusterExport(cl, c('largeDataSet', 'myComputation'))
91+ # ' # now you can refer to 'largeDataSet' and 'myComputation'
92+ # ' parallel::clusterApply(cl, 'dummy', function(...) myComputation(largeDataSet))
93+ # '
94+ # ' # use the future package with FastR
95+ # ' if (require(future)) {
96+ # ' future::plan(future::cluster, workers = makeFastRCluster())
97+ # ' val %<-% R.version
98+ # ' print(val)
99+ # ' }
100+ # '
101+ # ' @keywords internal
102+ " _PACKAGE"
103+
104+ # ' Default GraalVM installation path
105+ # '
106+ # ' Gives the path to the default location of GraalVM installation that includes FastR.
107+ # ' The default location is inside the directory where the fastRCluster was installed.
108+ # '
109+ # ' \code{\link{getGraalVMHome()}} uses this value as the default,
110+ # ' if no other value is explicitly configured via R options or an environment variable.
111+ # '
112+ # ' @return The default GraalVM installation path
113+ # ' @seealso \code{\link{getGraalVMHome}}
114+ # ' @export
115+ defaultGraalVMHome <- function () {
116+ fastrPkgHome <- find.package(' fastRCluster' )
117+ file.path(fastrPkgHome , ' graalvm' )
118+ }
119+
120+ # ' Currently configured GraalVM path
121+ # '
122+ # ' Gives the path that is used as a default value of the \code{graalVMHome} parameter
123+ # ' for most of the functions in the fastRCluster package.
124+ # '
125+ # ' The value is taken from (in this order)
126+ # ' \enumerate{
127+ # ' \item R option "graalvm.home"
128+ # ' \item environment variable \code{GRAALVM_HOME}
129+ # ' \item \code{\link{defaultGraalVMHome}()}
130+ # ' }
131+ # '
132+ # ' @return The currently configured path to GraalVM installation.
133+ # ' @seealso \code{\link{defaultGraalVMHome}}
134+ # ' @export
135+ getGraalVMHome <- function () getOption(" graalvm.home" , Sys.getenv(' GRAALVM_HOME' , defaultGraalVMHome()));
136+
137+ # ' Installs FastR
138+ # '
139+ # ' Downloads GraalVM Community Edition and installs the R ("FastR") component for GraalVM.
140+ # '
141+ # ' Note: the download is around 300MB. The installation usually takes few seconds.
142+ # ' If the given directory already contains GraalVM, this function installs the R ("FastR") component.
143+ # '
144+ # ' @param path Path to a directory where GraalVM should be installed. Defaults to \code{\link{defaultGraalVMHome}()}.
145+ # ' @return the path where GraalVM was installed if successful (invisible), otherwise this function raises an error.
146+ # ' @seealso \code{\link{defaultGraalVMHome}}
147+ # ' @export
148+ installFastR <- function (path = defaultGraalVMHome()) {
149+ toRemove <- character (0 )
150+ on.exit(unlink(toRemove , recursive = T , force = T )) # note: unlink seems to be OK with non-existing files
151+ if (file.exists(file.path(path , ' bin' , ' Rscript' ))) {
152+ message(sprintf(" The directory '%s' appears to already contain GraalVM installation with FastR. Doing nothing." , path ))
153+ return (invisible (path ))
154+ } else if (file.exists(file.path(path , ' bin' , ' gu' ))) {
155+ message(sprintf(" The directory '%s' appears to already contain GraalVM installation. FastR will be installed in it." , path ))
156+ } else {
157+ if (! file.exists(path )) {
158+ message(sprintf(" The path '%s' does not exist. Creating it." , path ))
159+ dir.create(path )
160+ } else if (length(list.files(path )) > 0L ) {
161+ message(sprintf(" The directory '%s' is not empty. Choose different directory or remove its contents." , path ))
162+ }
163+ tarFile <- paste0(tempfile(), ' .tar.gz' )
164+ url <- if (Sys.info()[[" sysname" ]] == " Darwin" )
165+ ' https://github.com/oracle/graal/releases/download/vm-19.0.2/graalvm-ce-darwin-amd64-19.0.2.tar.gz' else
166+ ' https://github.com/oracle/graal/releases/download/vm-19.0.2/graalvm-ce-linux-amd64-19.0.2.tar.gz' ;
167+ toRemove <- tarFile
168+ download.file(url , tarFile )
169+ workDir <- dirname(path )
170+ origFiles <- list.files(workDir )
171+ untarRes <- untar(tarFile , exdir = workDir )
172+ if (untarRes != 0L ) {
173+ stop(sprintf(" An error occurred when extracting GraalVM files to '%s'. Is this directory writeable? Error code: %d." , path , untarRes ))
174+ }
175+ graalVMOrigDir <- setdiff(list.files(workDir ), origFiles )
176+ renRes <- file.rename(file.path(workDir , graalVMOrigDir ), file.path(workDir , basename(path )))
177+ if (! all(renRes )) {
178+ stop(sprintf(" An error occurred when moving GraalVM files to '%s'. Is this directory writeable? Error code: %d." , path , renRes ))
179+ }
180+ }
181+ guRes <- system2(file.path(path , ' bin' , ' gu' ), args = c(' install' , ' R' ))
182+ if (guRes != 0 ) {
183+ stop(" An error occurred during installation of FastR. Please report at https://github.com/oracle/fastr." )
184+ }
185+ invisible (path )
186+ }
187+
188+ # ' Installs packages on the FastR engine
189+ # '
190+ # ' @param ... Parameters passed to the R function \code{install.packages} that is run on the FastR engine.
191+ # ' @return Invisible \code{NULL}
192+ # ' @export
193+ fastRClusterInstallPackages <- function (... ) {
194+ cl <- makeFastRCluster(1 , metehods = F )
195+ on.exit(stopCluster(cl ))
196+ parallel :: clusterApply(cl , list (list (... )), function (args ) do.call(install.packages , args ))
197+ invisible (NULL )
198+ }
199+
200+ # ' Creates cluster nodes that delegate to FastR
201+ # '
202+ # ' FastR is an alternative implementation of the R programming language,
203+ # ' which provides superb performance for computation intensive and longer
204+ # ' running jobs, but takes bit more time to warm-up.
205+ # '
206+ # ' @param nnodes Number of nodes to be created.
207+ # ' @param graalVMHome Path to the installation directory of GraalVM and FastR. Default value is obtained from \code{getGraalVMHome()}.
208+ # ' @param mode Mode in which to run FastR. See the FastR documentation on the details on the difference between jvm and native modes.
209+ # ' @param polyglot Run FastR in a polyglot mode: other installed GraalVM languages will be available via \code{eval.polyglot}. See \code{installGraalVMLanguage}. Allowed only for mode 'jvm' (the default).
210+ # ' @param fastROptions Additional options for the FastR engine.
211+ # ' @param ... Additional options forwarded to \code{makePSOCKcluster}
212+ # ' @return The cluster object that can be passed to functions like \code{parallel::clusterApply}.
213+ # ' @seealso \code{\link{getGraalVMHome}}
214+ # ' @export
215+ # ' @examples
216+ # ' cl <- makeFastRCluster()
217+ # ' parallel::clusterApply(cl, 'dummy', function(...) R.version)
218+ makeFastRCluster <- function (nnodes = 1L , graalVMHome = getGraalVMHome(), mode = c(' jvm' , ' native' ), polyglot = FALSE , fastROptions = NULL , ... ) {
219+ nnodes <- as.integer(nnodes )
220+ if (is.na(nnodes ) || nnodes < 1L ) {
221+ stop(" 'nnodes' must be >= 1" )
222+ }
223+ parallel ::: .check_ncores(nnodes )
224+
225+ if (! dir.exists(graalVMHome )) {
226+ if (graalVMHome == defaultGraalVMHome()) {
227+ stop(sprintf(paste0(" It seems that FastR was not installed yet.\n " ,
228+ " Use installFastR() to install GraalVM and FastR to the default location '%s', " ,
229+ " or set argument 'path' to a directory that contains GraalVM and FastR installation." , defaultGraalVMHome())))
230+ } else {
231+ stop(sprintf(paste0(" The GraalVM directory '%s' does not exist.\n " ,
232+ " Use installFastR('%s') to install GraalVM and FastR to that directory." ),
233+ graalVMHome , graalVMHome ))
234+ }
235+ }
236+ if (! file.exists(file.path(graalVMHome , ' bin' , ' gu' ))) {
237+ stop(sprintf(" The GraalVM directory '%s' appears to be corrupt.\n You can remove it and use installFastR('%s') to re-install GraalVM and FastR." , graalVMHome , graalVMHome ))
238+ }
239+ if (! file.exists(file.path(graalVMHome , ' bin' , ' Rscript' ))) {
240+ stop(sprintf(" The GraalVM installation '%s' does not contain FastR.\n Use installFastR('%s') to install FastR." , graalVMHome , graalVMHome ))
241+ }
242+ if (any(c(' --jvm' , ' --native' ) %in% fastROptions )) {
243+ warning(" Ignoring --jvm/--native in 'fastROptions' argument. Use the 'mode' argument instead." )
244+ }
245+ if (any(c(' --polyglot' ) %in% fastROptions )) {
246+ warning(" Ignoring --polyglot in 'fastROptions' argument. Use the 'polyglot' argument instead." )
247+ }
248+
249+ mode <- match.arg(mode )
250+ options <- fastROptions [grep(' --jvm' , fastROptions )]
251+ options <- options [grep(' --native' , fastROptions )]
252+ options <- options [grep(' --polyglot' , fastROptions )]
253+ if (polyglot ) {
254+ if (mode != ' jvm' ) {
255+ stop(" polyglot is only available when mode = 'jvm'" )
256+ }
257+ options <- c(' --polyglot' , options )
258+ }
259+ options <- switch (mode ,
260+ jvm = c(' --jvm' , options ),
261+ native = c(' --native' , options ))
262+
263+ result <- parallel :: makePSOCKcluster(nnodes , rscript = file.path(graalVMHome , ' bin' , ' Rscript' ), rscript_args = options , ... )
264+ class(result ) <- c(" fastRCluster" , class(result ))
265+ result
266+ }
267+
268+ # ' @export
269+ print.fastRCluster <- function (x , ... ) {
270+ cat(" FastR " ); NextMethod(x , ... )
271+ }
0 commit comments