feat: optuna tune script PoC (#1411)

bagxi · web-flow · commit 7bd9eb5c6536 · 2022-04-17T13:33:02.000+03:00
* feat: optuna tune script PoC

* fix: workaround for latest version of Jinja2

* fix: workaround for latest version of click

* fix: workaround for minimal tests added

* fix: workaround for new version of "accelerate" added

* tests: test_tune minimal updated

* docs: changelog updated

* tests: "check_config_api.sh" workflow added
diff --git a/.github/workflows/codestyle.yml b/.github/workflows/codestyle.yml
@@ -83,7 +83,7 @@ jobs:
         env:
           REVIEWDOG_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
-          LINE_LENGTH=99 catalyst-codestyle-flake8 . | reviewdog -f=pep8 -reporter=github-pr-review
+          LINE_LENGTH=89 catalyst-codestyle-flake8 . | reviewdog -f=pep8 -reporter=github-pr-review
 
   build:
     name: codestyle-and-docs
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,7 +8,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Added
 
--
+- `catalyst-tune` for Config API added [#1411](https://github.com/catalyst-team/catalyst/pull/1411)
 
 ### Changed
 
diff --git a/bin/workflows/check_config_api.sh b/bin/workflows/check_config_api.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+# Cause the script to exit if a single command fails
+set -eo pipefail -v
+
+DEVICE=${DEVICE:="cpu"}
+
+pip install -e . --quiet --no-deps --upgrade-strategy only-if-needed
+
+################################  pipeline 00  ################################
+# checking `catalyst-run` console script entry point
+
+PYTHONPATH="${PYTHONPATH}:." catalyst-run \
+  -C "tests/pipelines/configs/test_mnist.yml" \
+    "tests/pipelines/configs/engine_${DEVICE}.yml"
+
+rm -rf tests/logs
+
+################################  pipeline 01  ################################
+# checking `catalyst-tune` console script entry point
+
+pip install -r requirements/requirements-optuna.txt --quiet \
+  --find-links https://download.pytorch.org/whl/cpu/torch_stable.html \
+  --upgrade-strategy only-if-needed
+
+PYTHONPATH="${PYTHONPATH}:." catalyst-tune \
+  -C "tests/contrib/scripts/test_tune.yml" \
+    "tests/pipelines/configs/engine_${DEVICE}.yml" \
+  --n-trials 2
+
+rm -rf tests/logs
diff --git a/catalyst/contrib/scripts/run.py b/catalyst/contrib/scripts/run.py
@@ -1,12 +1,13 @@
-from typing import Iterable
+#!/usr/bin/env python
+from typing import Dict, Iterable
 import argparse
 import logging
 
 from catalyst import utils
 from catalyst.registry import REGISTRY
 
 
-def parse_args():
+def parse_args(args: Iterable = None, namespace: argparse.Namespace = None):
     """Parses the command line arguments and returns arguments and config."""
     parser = argparse.ArgumentParser()
     parser.add_argument(
@@ -29,18 +30,14 @@ def parse_args():
     )
     utils.boolean_flag(parser, "benchmark", default=None, help="Use CuDNN benchmark")
 
-    args, unknown_args = parser.parse_known_args()
+    args, unknown_args = parser.parse_known_args(args=args, namespace=namespace)
     return vars(args), unknown_args
 
 
-def run_from_config(
-    configs: Iterable[str],
-    deterministic: bool = None,
-    benchmark: bool = None,
-) -> None:
-    """Creates Runner from YAML configs and runs experiment."""
-    logger = logging.getLogger(__name__)
-
+def process_configs(
+    configs: Iterable[str], deterministic: bool = None, benchmark: bool = None
+) -> Dict:
+    """Merges YAML configs and prepares env."""
     # there is no way to set deterministic/benchmark flags with a runner,
     # so do it manually
     utils.prepare_cudnn(deterministic, benchmark)
@@ -49,9 +46,13 @@ def run_from_config(
     for config_path in configs:
         config_part = utils.load_config(config_path, ordered=True)
         config = utils.merge_dicts(config, config_part)
-    # config_copy = copy.deepcopy(config)
 
-    experiment_params = REGISTRY.get_from_params(**config)
+    return config
+
+
+def run_from_params(experiment_params: Dict) -> None:
+    """Runs multi-stage experiment."""
+    logger = logging.getLogger(__name__)
 
     runner = experiment_params["runner"]
     for stage_params in experiment_params["run"]:
@@ -62,15 +63,21 @@ def run_from_config(
         if result is not None:
             logger.info(f"{name}:\n{result}")
 
-    # TODO: check if needed
-    # logdir = getattr(runner, "logdir", getattr(runner, "_logdir"), None)
-    # if logdir and utils.get_rank() <= 0:
-    #     utils.dump_environment(logdir=logdir, config=config_copy, configs_path=configs)
+
+def run_from_config(
+    configs: Iterable[str],
+    deterministic: bool = None,
+    benchmark: bool = None,
+) -> None:
+    """Creates Runner from YAML configs and runs experiment."""
+    config = process_configs(configs, deterministic=deterministic, benchmark=benchmark)
+    experiment_params = REGISTRY.get_from_params(**config)
+    run_from_params(experiment_params)
 
 
 def main():
     """Runs the ``catalyst-run`` script."""
-    kwargs, unknown_args = parse_args()
+    kwargs, _ = parse_args()
     run_from_config(**kwargs)
 
 
diff --git a/catalyst/contrib/scripts/tune.py b/catalyst/contrib/scripts/tune.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python
+"""Config API and Optuna integration for AutoML hyperparameters tuning."""
+from typing import Iterable
+import argparse
+import copy
+
+import optuna
+
+from catalyst import utils
+from catalyst.contrib.scripts import run
+from catalyst.registry import REGISTRY
+from hydra_slayer import functional as F
+
+
+def parse_args(args: Iterable = None, namespace: argparse.Namespace = None):
+    """Parses the command line arguments and returns arguments and config."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--n-trials", type=int, default=None)
+    parser.add_argument("--timeout", type=int, default=None)
+    parser.add_argument("--n-jobs", type=int, default=1)
+    utils.boolean_flag(parser, "gc-after-trial", default=False)
+    utils.boolean_flag(parser, "show-progress-bar", default=False)
+
+    args, unknown_args = parser.parse_known_args(args=args, namespace=namespace)
+    return vars(args), unknown_args
+
+
+def main():
+    """Runs the ``catalyst-tune`` script."""
+    kwargs_run, unknown_args = run.parse_args()
+    kwargs_tune, _ = parse_args(args=unknown_args)
+
+    config_full = run.process_configs(**kwargs_run)
+    config = copy.copy(config_full)
+    config_study = config.pop("study")
+
+    # optuna objective
+    def objective(trial: optuna.trial):
+        # workaround for `REGISTRY.get_from_params` - redefine `trial` var
+        experiment_params, _ = F._recursive_get_from_params(
+            factory_key=REGISTRY.name_key,
+            get_factory_func=REGISTRY.get,
+            params=config,
+            shared_params={},
+            var_key=REGISTRY.var_key,
+            attrs_delimiter=REGISTRY.attrs_delimiter,
+            vars_dict={**REGISTRY._vars_dict, "trial": trial},
+        )
+        runner = experiment_params["runner"]
+        runner._trial = trial
+
+        run.run_from_params(experiment_params)
+        score = runner.epoch_metrics[runner._valid_loader][runner._valid_metric]
+
+        return score
+
+    study = REGISTRY.get_from_params(**config_study)
+    study.optimize(objective, **kwargs_tune)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/notebooks/XLA.ipynb b/examples/notebooks/XLA.ipynb
@@ -704,6 +704,36 @@
     "runner = CustomRunner(logdir)\n",
     "runner.run()"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Integrations"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! cd catalyst && pip install -e ."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! cd catalyst && DEVICE=\"engine_core\" bash bin/workflows/check_config_api.sh"
+   ]
   }
  ],
  "metadata": {
diff --git a/examples/notebooks/XLA_ddp.ipynb b/examples/notebooks/XLA_ddp.ipynb
@@ -1047,6 +1047,36 @@
     "world_size: int = 8\n",
     "xmp.spawn(train_fn, args=(world_size,), nprocs=world_size, start_method=\"fork\")"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Integrations"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! cd catalyst && pip install -e ."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! cd catalyst && DEVICE=\"ddp_xla\" bash bin/workflows/check_config_api.sh"
+   ]
   }
  ],
  "metadata": {
diff --git a/examples/notebooks/colab_ci_cd.ipynb b/examples/notebooks/colab_ci_cd.ipynb
@@ -145,7 +145,7 @@
       },
       "outputs": [],
       "source": [
-        "! cd catalyst && catalyst-make-codestyle && catalyst-check-codestyle > codestyle.txt"
+        "! cd catalyst && catalyst-make-codestyle -l 89 && catalyst-check-codestyle -l 89 > codestyle.txt"
       ]
     },
     {
@@ -236,6 +236,16 @@
         "! cd catalyst && pip install -e ."
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "! cd catalyst && DEVICE=\"cpu\" bash bin/workflows/check_config_api.sh\n",
+        "! cd catalyst && DEVICE=\"gpu\" bash bin/workflows/check_config_api.sh"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt
@@ -1,13 +1,15 @@
 pytest==5.3.1
 sphinx==2.2.1
+Jinja2<=3.0.3
 docutils==0.17.1
 # sphinx==4.2.0
 # git+https://github.com/bitprophet/releases/#egg=releases
 # git+https://github.com/readthedocs/sphinx_rtd_theme
 mock==3.0.5
 catalyst-codestyle==21.09.2
 black==21.8b0
+click<=8.0.4
 catalyst-sphinx-theme==1.2.0
 tomlkit==0.7.2
 pre-commit==2.13.0
-path
+path
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
@@ -3,7 +3,7 @@ numpy>=1.18
 torch>=1.4.0
 
 # hardware backend
-accelerate
+accelerate>=0.5.1
 
 # registry
 hydra-slayer>=0.4.0
diff --git a/setup.py b/setup.py
@@ -100,6 +100,7 @@ def load_version():
         "console_scripts": [
             "catalyst-contrib=catalyst.contrib.__main__:main",
             "catalyst-run=catalyst.contrib.scripts.run:main",
+            "catalyst-tune=catalyst.contrib.scripts.tune:main",
         ],
     },
     scripts=[
diff --git a/tests/contrib/scripts/test_tune.py b/tests/contrib/scripts/test_tune.py
diff --git a/tests/contrib/scripts/test_tune.yml b/tests/contrib/scripts/test_tune.yml
diff --git a/tests/pipelines/configs/engine_core.yml b/tests/pipelines/configs/engine_core.yml
diff --git a/tests/pipelines/configs/engine_ddp_xla.yml b/tests/pipelines/configs/engine_ddp_xla.yml