From 45930701cc8671c756ba35db75a886d0c10c22b2 Mon Sep 17 00:00:00 2001 From: devrimcavusoglu Date: Mon, 20 Feb 2023 17:17:58 +0300 Subject: [PATCH 1/2] (wip) Notebook updates. --- examples/jury_evaluate.ipynb | 88 ++++++++++++++++++++++++++++-------- 1 file changed, 68 insertions(+), 20 deletions(-) diff --git a/examples/jury_evaluate.ipynb b/examples/jury_evaluate.ipynb index d83d3e4..b01681e 100644 --- a/examples/jury_evaluate.ipynb +++ b/examples/jury_evaluate.ipynb @@ -4,14 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\"Open" + "\"Open" ] }, { @@ -20,7 +13,7 @@ "source": [ "## Packages (Colab)\n", "\n", - "To be able to use several metrics (e.g SacreBLEU, BERTScore, etc.), you need to install related package. When you try to use it without having those required packages, an exception will be thrown indicating that installation of spesific package is required. If you want to see score outputs for SacreBLEU and BERTScore in the experiments in this notebook, comment off related lines (those will be declared later with in line comments).\n", + "To be able to use several metrics (e.g SacreBLEU, BERTScore, etc.), you need to install related package. When you try to use it without having those required packages, an exception will be thrown indicating that installation of specific package is required. If you want to see score outputs for SacreBLEU and BERTScore in the experiments in this notebook, comment off related lines (those will be declared later with in line comments).\n", "\n", "If you want to see/use those metrics, install required packages below with commenting off the code cell below." ] @@ -33,7 +26,7 @@ }, "outputs": [], "source": [ - "!pip install jury" + "!pip install -q jury" ] }, { @@ -70,14 +63,12 @@ "text": [ "/home/devrim/anaconda3/envs/jury/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", - "2022-09-30 17:48:30.188018: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-02-20 11:38:18.371836: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2022-09-30 17:48:30.361067: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", - "2022-09-30 17:48:30.361090: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", - "2022-09-30 17:48:30.396709: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", - "2022-09-30 17:48:30.939257: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", - "2022-09-30 17:48:30.939339: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", - "2022-09-30 17:48:30.939347: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n" + "2023-02-20 11:38:18.764660: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2023-02-20 11:38:19.502781: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-20 11:38:19.502882: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-20 11:38:19.502887: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n" ] } ], @@ -90,7 +81,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T19:59:22.531153Z", @@ -119,7 +110,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T19:59:22.547167Z", @@ -145,7 +136,53 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Define Metrics\n", + "### Load Metric\n", + "\n", + "Here, we begin by loading and computing a single metric used to evaluate MT prediction and references. You can use load function from jury where you can pass additional parameters to specified metric." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "bleu_2 = load_metric(\n", + " \"bleu\", # metric name\n", + " resulting_name=\"bleu_2\", # name on output \n", + " compute_kwargs={\"max_order\": 2}, # kwargs to be used on computation\n", + " # **kwargs_passed_to_evaluate\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'my_predictions' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn [10], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m bleu_2\u001b[38;5;241m.\u001b[39mcompute(predictions\u001b[38;5;241m=\u001b[39mmy_predictions, references\u001b[38;5;241m=\u001b[39mmy_references)\n", + "\u001b[0;31mNameError\u001b[0m: name 'my_predictions' is not defined" + ] + } + ], + "source": [ + "bleu_2.compute(predictions=my_predictions, references=my_references)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Define Multiple Metrics\n", "\n", "Here define your metrics used to evaluate MT prediction and references. You can either use load function from jury where you can pass additional parameters to specified metric, or specify as string, which will use default parameters.\n", "\n", @@ -154,6 +191,17 @@ "[Here](https://huggingface.co/transformers/pretrained_models.html), you can observe model sizes, parameter counts, etc." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "MY_METRICS = [\"bleu\", \"meteor\", \"rouge\"]\n", + "scorer = Jury(metrics=MY_METRICS, run_concurrent=True)\n", + "scores = scorer(predictions=my_predictions, references=my_references)" + ] + }, { "cell_type": "code", "execution_count": 4, From d19abdbca2a6a3fcbd38b2b3b2ee9fb707d5a6d9 Mon Sep 17 00:00:00 2001 From: devrimcavusoglu Date: Tue, 21 Feb 2023 17:15:07 +0300 Subject: [PATCH 2/2] Notebook updated. --- examples/jury_evaluate.ipynb | 157 ++++++----------------------------- 1 file changed, 24 insertions(+), 133 deletions(-) diff --git a/examples/jury_evaluate.ipynb b/examples/jury_evaluate.ipynb index b01681e..07c1519 100644 --- a/examples/jury_evaluate.ipynb +++ b/examples/jury_evaluate.ipynb @@ -49,29 +49,14 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T19:59:22.515146Z", "start_time": "2021-10-02T19:59:20.716835Z" } }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/devrim/anaconda3/envs/jury/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n", - "2023-02-20 11:38:18.371836: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-20 11:38:18.764660: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", - "2023-02-20 11:38:19.502781: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-20 11:38:19.502882: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-20 11:38:19.502887: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n" - ] - } - ], + "outputs": [], "source": [ "import os\n", "import json # Just for pretty printing the resulting dict.\n", @@ -81,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T19:59:22.531153Z", @@ -110,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T19:59:22.547167Z", @@ -143,39 +128,30 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "bleu_2 = load_metric(\n", - " \"bleu\", # metric name\n", - " resulting_name=\"bleu_2\", # name on output \n", - " compute_kwargs={\"max_order\": 2}, # kwargs to be used on computation\n", + " \"bleu\",\n", + " resulting_name=\"bleu_2\",\n", + " compute_kwargs={\"max_order\": 2},\n", " # **kwargs_passed_to_evaluate\n", ")" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": { "scrolled": true }, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'my_predictions' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn [10], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m bleu_2\u001b[38;5;241m.\u001b[39mcompute(predictions\u001b[38;5;241m=\u001b[39mmy_predictions, references\u001b[38;5;241m=\u001b[39mmy_references)\n", - "\u001b[0;31mNameError\u001b[0m: name 'my_predictions' is not defined" - ] - } - ], + "outputs": [], "source": [ - "bleu_2.compute(predictions=my_predictions, references=my_references)" + "bleu_2.compute(\n", + " predictions=mt_predictions, \n", + " references=mt_references\n", + ")" ] }, { @@ -194,17 +170,6 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "MY_METRICS = [\"bleu\", \"meteor\", \"rouge\"]\n", - "scorer = Jury(metrics=MY_METRICS, run_concurrent=True)\n", - "scores = scorer(predictions=my_predictions, references=my_references)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T19:59:22.562180Z", @@ -235,7 +200,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T19:59:26.664594Z", @@ -252,7 +217,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T19:59:26.679608Z", @@ -260,48 +225,7 @@ }, "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"total_items\": 2,\n", - " \"empty_items\": 0,\n", - " \"bleu_1\": {\n", - " \"score\": 0.8823529411764706,\n", - " \"precisions\": [\n", - " 0.8823529411764706\n", - " ],\n", - " \"brevity_penalty\": 1.0,\n", - " \"length_ratio\": 1.0,\n", - " \"translation_length\": 11,\n", - " \"reference_length\": 11\n", - " },\n", - " \"bleu_2\": {\n", - " \"score\": 0.7531446678801508,\n", - " \"precisions\": [\n", - " 0.8823529411764706,\n", - " 0.6428571428571429\n", - " ],\n", - " \"brevity_penalty\": 1.0,\n", - " \"length_ratio\": 1.0,\n", - " \"translation_length\": 11,\n", - " \"reference_length\": 11\n", - " },\n", - " \"meteor\": {\n", - " \"score\": 0.727184593644221\n", - " },\n", - " \"rouge\": {\n", - " \"rouge1\": 0.7783882783882783,\n", - " \"rouge2\": 0.5925324675324675,\n", - " \"rougeL\": 0.7426739926739926,\n", - " \"rougeLsum\": 0.7426739926739926\n", - " }\n", - "}\n" - ] - } - ], + "outputs": [], "source": [ "# Display results\n", "print(json.dumps(scores, indent=4))" @@ -318,7 +242,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T19:59:29.552689Z", @@ -337,7 +261,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T19:59:30.130953Z", @@ -345,22 +269,7 @@ }, "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"total_items\": 3,\n", - " \"empty_items\": 0,\n", - " \"squad\": {\n", - " \"exact_match\": 0.33333333333333337,\n", - " \"f1\": 0.8222222222222223\n", - " }\n", - "}\n" - ] - } - ], + "outputs": [], "source": [ "qa_jury = Jury(metrics=QA_METRICS, run_concurrent=False)\n", "scores = qa_jury(predictions=qa_predictions, references=qa_references)\n", @@ -378,7 +287,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T20:17:28.152162Z", @@ -470,7 +379,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T20:17:28.353117Z", @@ -487,7 +396,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T20:17:28.583593Z", @@ -495,25 +404,7 @@ }, "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"total_items\": 3,\n", - " \"empty_items\": 0,\n", - " \"squad\": {\n", - " \"exact_match\": 0.33333333333333337,\n", - " \"f1\": 0.8222222222222223\n", - " },\n", - " \"word_match\": {\n", - " \"score\": 0.7222222222222222\n", - " }\n", - "}\n" - ] - } - ], + "outputs": [], "source": [ "qa_jury = Jury(metrics=QA_METRICS, run_concurrent=False)\n", "scores = qa_jury(predictions=qa_predictions, references=qa_references)\n",