## ----results = "asis", echo = FALSE------------------------------------------- # output format should be of the form #> output #> output knitr::opts_chunk$set(collapse = TRUE, comment = "#>") # initialize: load library, make everything deterministic library("mlrCPO") set.seed(123) # get the path of the parent document # path = names(knitr::opts_knit$get("encoding"))[1] base = knitr::opts_knit$get("output.dir") file = sys.frame(min(grep("^knitr::knit$|^knit$", sapply(sys.calls(), function(x) as.character(x)[1]))))$input file = basename(file) path = file.path(base, file) rpath = gsub("\\.[^.]*$", ".R", path) # strip whitespace from lines in tangle (R file) output for lintr knitr::knit_hooks$set(document = function(x) { if (file_test("-f", rpath)) { lines = readLines(rpath) lines = gsub(" *(\n|$)", "\\1", lines) cat(lines, file = rpath, sep = "\n", append = FALSE) } x }) ############################# # do the trans-vignette ToC # ############################# fullfile = file allfiles = list.files(path = base, pattern = ".*\\.Rmd$") stopifnot(file %in% allfiles) # collect information (title, url, main / compact) for each file in vignette dir fileinfolist = list() for (cf in allfiles) { ismain = TRUE if (grepl("^z_", cf)) { infoslot = gsub("^z_", "", cf) infoslot = gsub("_terse\\.Rmd$", "", infoslot) subslot = "compact" } else { infoslot = gsub("^a_", "", cf) infoslot = gsub("\\.Rmd$", "", infoslot) subslot = "main" } content = scan(paste(base, cf, sep = "/"), what = "character", quiet = TRUE) pos = min(c(which(content == "title:"), Inf)) if (is.infinite(pos)) { stop(sprintf("parsing error: %s", cf)) } infolist = list(title = content[pos + 1], url = cf, iscurrent = cf == file) applist = list(infolist) names(applist) = subslot fileinfolist[[infoslot]] = c(fileinfolist[[infoslot]], applist) } # helper function that creates a link for all files except the current one linkify = function(info, title) { if (info$iscurrent) { title } else { sprintf("[%s](%s)", title, gsub("\\.Rmd$", ".html", info$url)) } } # output ToC for (idx in seq_along(fileinfolist)) { content = fileinfolist[[sort(names(fileinfolist))[idx]]] if (!is.null(content$compact)) { if (paste(sub("[0-9]\\. ", "", content$main$title), "(No Output)") != sub("^z ", "", content$compact$title)) { stop(sprintf("File %s and its compact version %s have incompatible titles\nThe compact version must be paste(main_title, \"(No Output)\"). Is: '%s', expected: '%s'", content$main$url, content$compact$url, content$compact$title, paste(content$main$title, "(No Output)"))) } line = sprintf("%s (%s)", linkify(content$main, content$main$title), linkify(content$compact, "compact version")) } else { line = linkify(content$main, content$main$title) } cat(sprintf("%s. %s\n", idx, line)) if (content$main$iscurrent || content$compact$iscurrent) { fullfile = content$main$url } } fullpath = file.path(base, fullfile) ############################# # Optional Document TOC # ############################# # print everything up to level `print.level`. # level is the number of '#' prefixes. The lowest level is usually 2. printToc = function(print.level = 3) { owncontent = readLines(fullpath) tripletic = grepl("^```", owncontent) owncontent = owncontent[cumsum(tripletic) %% 2 == 0] # exclude ```-delimited code headlines = grep("^#+ +", owncontent, value = TRUE) headlevels = nchar(gsub(" .*", "", headlines)) headlines = gsub("^[#]+ +", "", headlines) links = gsub("[^-a-z. ]", "", tolower(headlines)) links = gsub(" +", "-", links) links = gsub("-$", "", links) if (!sum(headlevels <= print.level)) { return(invisible(NULL)) } cat("Table of Contents\n
\n", sep = "") lastlevel = headlevels[1] - 1 for (idx in seq_along(headlines)) { line = headlines[idx] level = headlevels[idx] link = links[idx] if (level > print.level) { next } if (level < headlevels[1]) { stop("First headline level must be the lowest one used, but '", line, "' is lower.") } lvldiff = level - lastlevel if (lvldiff > 1) { stop("Cannot jump headline levels. Error on: ", line) } if (lvldiff > 0) { # higher level -> open a
\n") } ############################# # Some output settings # ############################# options(width = 80) replaceprint = function(ofunc) { force(ofunc) function(x, ...) { cu = capture.output({ret = ofunc(x, ...)}) cu = grep("time: [-+e0-9.]{1,6}", cu, value = TRUE, invert = TRUE) cat(paste(cu, collapse = "\n")) if (!grepl("\n$", tail(cu, 1))) { cat("\n") } ret } } for (pfunc in grep("print\\.", ls(asNamespace("mlr")), value = TRUE)) { ofunc = get(pfunc, asNamespace("mlr")) assign(pfunc, replaceprint(ofunc)) } ## ----eval = TRUE, echo = FALSE, results = 'asis'------------------------------ printToc(4) ## ----------------------------------------------------------------------------- getCPOClass(cpoPca) getCPOClass(cpoPca()) getCPOClass(pid.task %>|% cpoPca()) getCPOClass(inverter(bh.task %>>% cpoLogTrafoRegr())) getCPOClass(NULLCPO) ## ----------------------------------------------------------------------------- print(cpoAsNumeric) # example CPOConstructor print(cpoAsNumeric, verbose = TRUE) # alternative: !cpoAsNumeric class(cpoAsNumeric) getCPOName(cpoPca) # same as getCPOName() of the *constructed* CPO getCPOClass(cpoPca) ## ----------------------------------------------------------------------------- names(formals(cpoPca)) ## ----------------------------------------------------------------------------- (cpo = cpoScale()) # construct CPO with default Hyperparameter values print(cpo, verbose = TRUE) # detailed printing. Alternative: !cpo class(cpo) # CPOs that are not compound are "CPOPrimitive" getCPOClass(cpo) ## ----------------------------------------------------------------------------- getParamSet(cpo) getHyperPars(cpo) setHyperPars(cpo, scale.center = FALSE) getCPOId(cpo) setCPOId(cpo, "MYID") getCPOName(cpo) getCPOAffect(cpo) # empty, since no affect set getCPOAffect(cpoPca(affect.pattern = "Width$")) getCPOConstructor(cpo) # the constructor used to create the CPO getCPOProperties(cpo) # see properties explanation below getCPOPredictType(cpo) getCPOClass(cpo) getCPOOperatingType(cpo) # Operating on feature, target, retrafoless? ## ----------------------------------------------------------------------------- getCPOPredictType(cpoResponseFromSE()) getCPOOperatingType(cpoResponseFromSE()) getCPOOperatingType(cpoSample()) ## ----------------------------------------------------------------------------- identicalCPO(cpoScale(scale = TRUE), cpoScale(scale = FALSE)) identicalCPO(cpoScale(), cpoPca()) ## ----------------------------------------------------------------------------- head(iris) %>>% cpoPca() task = applyCPO(cpoPca(), iris.task) head(getTaskData(task)) ## ----------------------------------------------------------------------------- scale = cpoScale() pca = cpoPca() ## ----------------------------------------------------------------------------- compound = scale %>>% pca composeCPO(scale, pca) # same class(compound) !compound getCPOName(compound) getHyperPars(compound) setHyperPars(compound, scale.center = TRUE, pca.center = FALSE) ## ----error = TRUE------------------------------------------------------------- try({ getCPOId(compound) # error: no ID for compound CPOs getCPOAffect(compound) # error: no affect for compound CPOs }) ## ----------------------------------------------------------------------------- getCPOOperatingType(NULLCPO) getCPOOperatingType(cpoScale()) getCPOOperatingType(cpoScale() %>>% cpoLogTrafoRegr() %>>% cpoSample()) ## ----------------------------------------------------------------------------- as.list(compound) pipeCPO(as.list(compound)) # chainCPO: (list of CPO) -> CPO pipeCPO(list()) ## ----------------------------------------------------------------------------- lrn = makeLearner("classif.logreg") (cpolrn = cpo %>>% lrn) # the new learner has the CPO hyperparameters attachCPO(compound, lrn) # attaching compound CPO ## ----------------------------------------------------------------------------- class(lrn) ## ----------------------------------------------------------------------------- lrn = cpoLogTrafoRegr() %>>% makeLearner("regr.lm") model = train(lrn, subsetTask(bh.task, 1:300)) predict(model, subsetTask(bh.task, 301:500)) ## ----------------------------------------------------------------------------- trafo = subsetTask(bh.task, 1:300) %>>% cpoLogTrafoRegr() model = train("regr.lm", trafo) newdata = subsetTask(bh.task, 301:500) %>>% retrafo(trafo) pred = predict(model, newdata) invert(inverter(newdata), pred) ## ----------------------------------------------------------------------------- getLearnerCPO(cpolrn) # the CPO getLearnerBare(cpolrn) # the Learner ## ----------------------------------------------------------------------------- transformed = iris %>>% cpoScale() head(transformed) (ret = retrafo(transformed)) ## ----------------------------------------------------------------------------- head(getTaskTargets(bh.task)) transformed = bh.task %>>% cpoLogTrafoRegr() head(getTaskTargets(transformed)) (inv = inverter(transformed)) head(invert(inv, getTaskTargets(transformed))) ## ----------------------------------------------------------------------------- attributes(transformed) ## ----------------------------------------------------------------------------- bh2 = bh.task retrafo(bh2) = ret attributes(bh2) ## ----------------------------------------------------------------------------- retrafo(bh2) = NULLCPO # equivalent: # retrafo(bh2) = NULL attributes(bh2) ## ----------------------------------------------------------------------------- # clearRI returns the object without retrafo or inverter attributes bh3 = clearRI(transformed) attributes(bh3) ## ----------------------------------------------------------------------------- getCPOName(ret) getParamSet(ret) getHyperPars(ret) getCPOProperties(ret) getCPOPredictType(ret) getCPOOperatingType(ret) # Operating on feature, target, both? getCPOOperatingType(inv) ## ----------------------------------------------------------------------------- getCPOTrainedCapability(ret) getCPOTrainedCapability(inv) getCPOTrainedCapability(NULLCPO) ## ----------------------------------------------------------------------------- getCPOClass(ret) getCPOClass(inv) ## ----------------------------------------------------------------------------- getCPOTrainedCPO(ret) getCPOConstructor(ret) ## ----------------------------------------------------------------------------- (state = getCPOTrainedState(retrafo(iris %>>% cpoScale()))) state$control$center[1] = 1000 # will now subtract 1000 from the first column new.retrafo = makeCPOTrainedFromState(cpoScale, state) head(iris %>>% new.retrafo) ## ----------------------------------------------------------------------------- data = head(iris) %>>% cpoPca() retrafo(data) data2 = data %>>% cpoScale() ## ----------------------------------------------------------------------------- retrafo(data2) ## ----------------------------------------------------------------------------- data = clearRI(data) data2 = data %>>% cpoScale() retrafo(data2) ## ----------------------------------------------------------------------------- retrafo(data) = NULL inverter(data) = NULL data3 = data %>>% cpoScale() retrafo(data3) ## ----------------------------------------------------------------------------- compound.retrafo = retrafo(head(iris) %>>% compound) compound.retrafo ## ----------------------------------------------------------------------------- (retrafolist = as.list(compound.retrafo)) ## ----------------------------------------------------------------------------- retrafolist[[1]] %>>% retrafolist[[2]] pipeCPO(retrafolist) ## ----------------------------------------------------------------------------- transformed = iris %>>% cpoScale() head(iris) %>>% retrafo(transformed) ## ----------------------------------------------------------------------------- head(transformed) ## ----eval = FALSE------------------------------------------------------------- # applyCPO(retrafo(transformed), head(iris)) # predict(retrafo(transformed), head(iris)) ## ----------------------------------------------------------------------------- transformed = bh.task %>>% cpoLogTrafoRegr() prediction = predict(train("regr.lm", transformed), transformed) inv = inverter(transformed) invert(inv, prediction) ## ----------------------------------------------------------------------------- ret = retrafo(transformed) invert(ret, prediction) ## ----------------------------------------------------------------------------- pipeCPO(list()) as.list(NULLCPO) # the inverse of pipeCPO retrafo(bh.task) inverter(bh.task %>>% cpoPca()) # cpoPca is a TOCPO, so no inverter is created ## ----------------------------------------------------------------------------- getCPOClass(NULLCPO) getCPOName(NULLCPO) getCPOId(NULLCPO) getHyperPars(NULLCPO) getParamSet(NULLCPO) getCPOAffect(NULLCPO) getCPOOperatingType(NULLCPO) # operates neither on features nor on targets. getCPOProperties(NULLCPO) # applying NULLCPO leads to a retrafo() of NULLCPO, so it is its own CPOTrainedCPO getCPOTrainedCPO(NULLCPO) # NULLCPO has no effect on applyCPO and invert, so NULLCPO's capabilities are 0. getCPOTrainedCapability(NULLCPO) getCPOTrainedState(NULLCPO) ## ----------------------------------------------------------------------------- nullToNullcpo(NULL) nullcpoToNull(NULLCPO) nullToNullcpo(10) # not changed nullcpoToNull(10) # ditto ## ----------------------------------------------------------------------------- cpo = cpoPca() getCPOId(cpo) ## ----------------------------------------------------------------------------- getParamSet(cpo) ## ----------------------------------------------------------------------------- getParamSet(setCPOId(cpo, "my.id")) ## ----------------------------------------------------------------------------- getParamSet(setCPOId(cpo, NULL)) ## ----error = TRUE------------------------------------------------------------- try({ cpo %>>% cpo }) ## ----------------------------------------------------------------------------- cpo %>>% setCPOId(cpo, "two") ## ----------------------------------------------------------------------------- getCPOProperties(cpoDummyEncode()) ## ----error = TRUE------------------------------------------------------------- try({ train("classif.fnn", bc.task) # gives an error }) ## ----------------------------------------------------------------------------- train(cpoDummyEncode(reference.cat = TRUE) %>>% makeLearner("classif.fnn"), bc.task) ## ----------------------------------------------------------------------------- getLearnerProperties("classif.fnn") ## ----------------------------------------------------------------------------- getLearnerProperties(cpoDummyEncode(TRUE) %>>% makeLearner("classif.fnn")) ## ----------------------------------------------------------------------------- getCPOProperties(cpoApplyFun(export = "export.all"), get.internal = TRUE) ## ----------------------------------------------------------------------------- # onlhy PCA columns that have '.Length' in their name cpo = cpoPca(affect.pattern = ".Length") getCPOAffect(cpo) ## ----------------------------------------------------------------------------- triris = iris %>>% cpo head(triris) ## ----------------------------------------------------------------------------- !cpoScale() ## ----------------------------------------------------------------------------- !cpoScale(export = "export.none") ## ----------------------------------------------------------------------------- !cpoScale(scale = FALSE, export = "export.unset")