## ----results = "asis", echo = FALSE------------------------------------------- # output format should be of the form #> output #> output knitr::opts_chunk$set(collapse = TRUE, comment = "#>") # initialize: load library, make everything deterministic library("mlrCPO") set.seed(123) # get the path of the parent document # path = names(knitr::opts_knit$get("encoding"))[1] base = knitr::opts_knit$get("output.dir") file = sys.frame(min(grep("^knitr::knit$|^knit$", sapply(sys.calls(), function(x) as.character(x)[1]))))$input file = basename(file) path = file.path(base, file) rpath = gsub("\\.[^.]*$", ".R", path) # strip whitespace from lines in tangle (R file) output for lintr knitr::knit_hooks$set(document = function(x) { if (file_test("-f", rpath)) { lines = readLines(rpath) lines = gsub(" *(\n|$)", "\\1", lines) cat(lines, file = rpath, sep = "\n", append = FALSE) } x }) ############################# # do the trans-vignette ToC # ############################# fullfile = file allfiles = list.files(path = base, pattern = ".*\\.Rmd$") stopifnot(file %in% allfiles) # collect information (title, url, main / compact) for each file in vignette dir fileinfolist = list() for (cf in allfiles) { ismain = TRUE if (grepl("^z_", cf)) { infoslot = gsub("^z_", "", cf) infoslot = gsub("_terse\\.Rmd$", "", infoslot) subslot = "compact" } else { infoslot = gsub("^a_", "", cf) infoslot = gsub("\\.Rmd$", "", infoslot) subslot = "main" } content = scan(paste(base, cf, sep = "/"), what = "character", quiet = TRUE) pos = min(c(which(content == "title:"), Inf)) if (is.infinite(pos)) { stop(sprintf("parsing error: %s", cf)) } infolist = list(title = content[pos + 1], url = cf, iscurrent = cf == file) applist = list(infolist) names(applist) = subslot fileinfolist[[infoslot]] = c(fileinfolist[[infoslot]], applist) } # helper function that creates a link for all files except the current one linkify = function(info, title) { if (info$iscurrent) { title } else { sprintf("[%s](%s)", title, gsub("\\.Rmd$", ".html", info$url)) } } # output ToC for (idx in seq_along(fileinfolist)) { content = fileinfolist[[sort(names(fileinfolist))[idx]]] if (!is.null(content$compact)) { if (paste(sub("[0-9]\\. ", "", content$main$title), "(No Output)") != sub("^z ", "", content$compact$title)) { stop(sprintf("File %s and its compact version %s have incompatible titles\nThe compact version must be paste(main_title, \"(No Output)\"). Is: '%s', expected: '%s'", content$main$url, content$compact$url, content$compact$title, paste(content$main$title, "(No Output)"))) } line = sprintf("%s (%s)", linkify(content$main, content$main$title), linkify(content$compact, "compact version")) } else { line = linkify(content$main, content$main$title) } cat(sprintf("%s. %s\n", idx, line)) if (content$main$iscurrent || content$compact$iscurrent) { fullfile = content$main$url } } fullpath = file.path(base, fullfile) ############################# # Optional Document TOC # ############################# # print everything up to level `print.level`. # level is the number of '#' prefixes. The lowest level is usually 2. printToc = function(print.level = 3) { owncontent = readLines(fullpath) tripletic = grepl("^```", owncontent) owncontent = owncontent[cumsum(tripletic) %% 2 == 0] # exclude ```-delimited code headlines = grep("^#+ +", owncontent, value = TRUE) headlevels = nchar(gsub(" .*", "", headlines)) headlines = gsub("^[#]+ +", "", headlines) links = gsub("[^-a-z. ]", "", tolower(headlines)) links = gsub(" +", "-", links) links = gsub("-$", "", links) if (!sum(headlevels <= print.level)) { return(invisible(NULL)) } cat("Table of Contents\n
\n", sep = "") lastlevel = headlevels[1] - 1 for (idx in seq_along(headlines)) { line = headlines[idx] level = headlevels[idx] link = links[idx] if (level > print.level) { next } if (level < headlevels[1]) { stop("First headline level must be the lowest one used, but '", line, "' is lower.") } lvldiff = level - lastlevel if (lvldiff > 1) { stop("Cannot jump headline levels. Error on: ", line) } if (lvldiff > 0) { # higher level -> open a
\n") } ############################# # Some output settings # ############################# options(width = 80) replaceprint = function(ofunc) { force(ofunc) function(x, ...) { cu = capture.output({ret = ofunc(x, ...)}) cu = grep("time: [-+e0-9.]{1,6}", cu, value = TRUE, invert = TRUE) cat(paste(cu, collapse = "\n")) if (!grepl("\n$", tail(cu, 1))) { cat("\n") } ret } } for (pfunc in grep("print\\.", ls(asNamespace("mlr")), value = TRUE)) { ofunc = get(pfunc, asNamespace("mlr")) assign(pfunc, replaceprint(ofunc)) } ## ----eval = TRUE, echo = FALSE, results = 'asis'------------------------------ printToc(4) ## ----------------------------------------------------------------------------- # getCPOClass(cpoPca) # getCPOClass(cpoPca()) # getCPOClass(pid.task %>|% cpoPca()) # getCPOClass(inverter(bh.task %>>% cpoLogTrafoRegr())) # getCPOClass(NULLCPO) ## ----------------------------------------------------------------------------- # print(cpoAsNumeric) # example CPOConstructor # print(cpoAsNumeric, verbose = TRUE) # alternative: !cpoAsNumeric # class(cpoAsNumeric) # getCPOName(cpoPca) # same as getCPOName() of the *constructed* CPO # getCPOClass(cpoPca) ## ----------------------------------------------------------------------------- # names(formals(cpoPca)) ## ----------------------------------------------------------------------------- # (cpo = cpoScale()) # construct CPO with default Hyperparameter values # print(cpo, verbose = TRUE) # detailed printing. Alternative: !cpo # class(cpo) # CPOs that are not compound are "CPOPrimitive" # getCPOClass(cpo) ## ----------------------------------------------------------------------------- # getParamSet(cpo) # getHyperPars(cpo) # setHyperPars(cpo, scale.center = FALSE) # getCPOId(cpo) # setCPOId(cpo, "MYID") # getCPOName(cpo) # getCPOAffect(cpo) # empty, since no affect set # getCPOAffect(cpoPca(affect.pattern = "Width$")) # getCPOConstructor(cpo) # the constructor used to create the CPO # getCPOProperties(cpo) # see properties explanation below # getCPOPredictType(cpo) # getCPOClass(cpo) # getCPOOperatingType(cpo) # Operating on feature, target, retrafoless? ## ----------------------------------------------------------------------------- # getCPOPredictType(cpoResponseFromSE()) # getCPOOperatingType(cpoResponseFromSE()) # getCPOOperatingType(cpoSample()) ## ----------------------------------------------------------------------------- # identicalCPO(cpoScale(scale = TRUE), cpoScale(scale = FALSE)) # identicalCPO(cpoScale(), cpoPca()) ## ----------------------------------------------------------------------------- # head(iris) %>>% cpoPca() # task = applyCPO(cpoPca(), iris.task) # head(getTaskData(task)) ## ----------------------------------------------------------------------------- # scale = cpoScale() # pca = cpoPca() ## ----------------------------------------------------------------------------- # compound = scale %>>% pca # composeCPO(scale, pca) # same # class(compound) # !compound # getCPOName(compound) # getHyperPars(compound) # setHyperPars(compound, scale.center = TRUE, pca.center = FALSE) ## ----error = TRUE------------------------------------------------------------- try({ # getCPOId(compound) # error: no ID for compound CPOs # getCPOAffect(compound) # error: no affect for compound CPOs }) ## ----------------------------------------------------------------------------- # getCPOOperatingType(NULLCPO) # getCPOOperatingType(cpoScale()) # getCPOOperatingType(cpoScale() %>>% cpoLogTrafoRegr() %>>% cpoSample()) ## ----------------------------------------------------------------------------- # as.list(compound) # pipeCPO(as.list(compound)) # chainCPO: (list of CPO) -> CPO # pipeCPO(list()) ## ----------------------------------------------------------------------------- # lrn = makeLearner("classif.logreg") # (cpolrn = cpo %>>% lrn) # the new learner has the CPO hyperparameters # attachCPO(compound, lrn) # attaching compound CPO ## ----------------------------------------------------------------------------- # class(lrn) ## ----------------------------------------------------------------------------- # lrn = cpoLogTrafoRegr() %>>% makeLearner("regr.lm") # model = train(lrn, subsetTask(bh.task, 1:300)) # predict(model, subsetTask(bh.task, 301:500)) ## ----------------------------------------------------------------------------- # trafo = subsetTask(bh.task, 1:300) %>>% cpoLogTrafoRegr() # model = train("regr.lm", trafo) # # newdata = subsetTask(bh.task, 301:500) %>>% retrafo(trafo) # pred = predict(model, newdata) # invert(inverter(newdata), pred) ## ----------------------------------------------------------------------------- # getLearnerCPO(cpolrn) # the CPO # getLearnerBare(cpolrn) # the Learner ## ----------------------------------------------------------------------------- # transformed = iris %>>% cpoScale() # head(transformed) # (ret = retrafo(transformed)) ## ----------------------------------------------------------------------------- # head(getTaskTargets(bh.task)) # transformed = bh.task %>>% cpoLogTrafoRegr() # head(getTaskTargets(transformed)) # (inv = inverter(transformed)) # head(invert(inv, getTaskTargets(transformed))) ## ----------------------------------------------------------------------------- # attributes(transformed) ## ----------------------------------------------------------------------------- # bh2 = bh.task # retrafo(bh2) = ret # attributes(bh2) ## ----------------------------------------------------------------------------- # retrafo(bh2) = NULLCPO # # equivalent: # # retrafo(bh2) = NULL # attributes(bh2) ## ----------------------------------------------------------------------------- # # clearRI returns the object without retrafo or inverter attributes # bh3 = clearRI(transformed) # attributes(bh3) ## ----------------------------------------------------------------------------- # getCPOName(ret) # getParamSet(ret) # getHyperPars(ret) # getCPOProperties(ret) # getCPOPredictType(ret) # getCPOOperatingType(ret) # Operating on feature, target, both? # getCPOOperatingType(inv) ## ----------------------------------------------------------------------------- # getCPOTrainedCapability(ret) # getCPOTrainedCapability(inv) # getCPOTrainedCapability(NULLCPO) ## ----------------------------------------------------------------------------- # getCPOClass(ret) # getCPOClass(inv) ## ----------------------------------------------------------------------------- # getCPOTrainedCPO(ret) # getCPOConstructor(ret) ## ----------------------------------------------------------------------------- # (state = getCPOTrainedState(retrafo(iris %>>% cpoScale()))) # state$control$center[1] = 1000 # will now subtract 1000 from the first column # new.retrafo = makeCPOTrainedFromState(cpoScale, state) # head(iris %>>% new.retrafo) ## ----------------------------------------------------------------------------- # data = head(iris) %>>% cpoPca() # retrafo(data) # data2 = data %>>% cpoScale() ## ----------------------------------------------------------------------------- # retrafo(data2) ## ----------------------------------------------------------------------------- # data = clearRI(data) # data2 = data %>>% cpoScale() # retrafo(data2) ## ----------------------------------------------------------------------------- # retrafo(data) = NULL # inverter(data) = NULL # data3 = data %>>% cpoScale() # retrafo(data3) ## ----------------------------------------------------------------------------- # compound.retrafo = retrafo(head(iris) %>>% compound) # compound.retrafo ## ----------------------------------------------------------------------------- # (retrafolist = as.list(compound.retrafo)) ## ----------------------------------------------------------------------------- # retrafolist[[1]] %>>% retrafolist[[2]] # pipeCPO(retrafolist) ## ----------------------------------------------------------------------------- # transformed = iris %>>% cpoScale() # head(iris) %>>% retrafo(transformed) ## ----------------------------------------------------------------------------- # head(transformed) ## ----eval = FALSE------------------------------------------------------------- # applyCPO(retrafo(transformed), head(iris)) # predict(retrafo(transformed), head(iris)) ## ----------------------------------------------------------------------------- # transformed = bh.task %>>% cpoLogTrafoRegr() # prediction = predict(train("regr.lm", transformed), transformed) # inv = inverter(transformed) # invert(inv, prediction) ## ----------------------------------------------------------------------------- # ret = retrafo(transformed) # invert(ret, prediction) ## ----------------------------------------------------------------------------- # pipeCPO(list()) # as.list(NULLCPO) # the inverse of pipeCPO # retrafo(bh.task) # inverter(bh.task %>>% cpoPca()) # cpoPca is a TOCPO, so no inverter is created ## ----------------------------------------------------------------------------- # getCPOClass(NULLCPO) # getCPOName(NULLCPO) # getCPOId(NULLCPO) # getHyperPars(NULLCPO) # getParamSet(NULLCPO) # getCPOAffect(NULLCPO) # getCPOOperatingType(NULLCPO) # operates neither on features nor on targets. # getCPOProperties(NULLCPO) # # applying NULLCPO leads to a retrafo() of NULLCPO, so it is its own CPOTrainedCPO # getCPOTrainedCPO(NULLCPO) # # NULLCPO has no effect on applyCPO and invert, so NULLCPO's capabilities are 0. # getCPOTrainedCapability(NULLCPO) # getCPOTrainedState(NULLCPO) ## ----------------------------------------------------------------------------- # nullToNullcpo(NULL) # nullcpoToNull(NULLCPO) # nullToNullcpo(10) # not changed # nullcpoToNull(10) # ditto ## ----------------------------------------------------------------------------- # cpo = cpoPca() # getCPOId(cpo) ## ----------------------------------------------------------------------------- # getParamSet(cpo) ## ----------------------------------------------------------------------------- # getParamSet(setCPOId(cpo, "my.id")) ## ----------------------------------------------------------------------------- # getParamSet(setCPOId(cpo, NULL)) ## ----error = TRUE------------------------------------------------------------- try({ # # cpo %>>% cpo }) ## ----------------------------------------------------------------------------- # cpo %>>% setCPOId(cpo, "two") ## ----------------------------------------------------------------------------- # getCPOProperties(cpoDummyEncode()) ## ----error = TRUE------------------------------------------------------------- try({ # train("classif.fnn", bc.task) # gives an error }) ## ----------------------------------------------------------------------------- # train(cpoDummyEncode(reference.cat = TRUE) %>>% makeLearner("classif.fnn"), bc.task) ## ----------------------------------------------------------------------------- # getLearnerProperties("classif.fnn") ## ----------------------------------------------------------------------------- # getLearnerProperties(cpoDummyEncode(TRUE) %>>% makeLearner("classif.fnn")) ## ----------------------------------------------------------------------------- # getCPOProperties(cpoApplyFun(export = "export.all"), get.internal = TRUE) ## ----------------------------------------------------------------------------- # # onlhy PCA columns that have '.Length' in their name # cpo = cpoPca(affect.pattern = ".Length") # getCPOAffect(cpo) ## ----------------------------------------------------------------------------- # triris = iris %>>% cpo # head(triris) ## ----------------------------------------------------------------------------- # !cpoScale() ## ----------------------------------------------------------------------------- # !cpoScale(export = "export.none") ## ----------------------------------------------------------------------------- # !cpoScale(scale = FALSE, export = "export.unset") ## ----results = "asis", echo = FALSE------------------------------------------- cat(knitr::knit_child("a_2_mlrCPO_core.Rmd", options = list(eval = FALSE)), sep = "\n")