diff --git a/examples/jury_evaluate.ipynb b/examples/jury_evaluate.ipynb index d83d3e4..07c1519 100644 --- a/examples/jury_evaluate.ipynb +++ b/examples/jury_evaluate.ipynb @@ -4,14 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\"Open" + "\"Open" ] }, { @@ -20,7 +13,7 @@ "source": [ "## Packages (Colab)\n", "\n", - "To be able to use several metrics (e.g SacreBLEU, BERTScore, etc.), you need to install related package. When you try to use it without having those required packages, an exception will be thrown indicating that installation of spesific package is required. If you want to see score outputs for SacreBLEU and BERTScore in the experiments in this notebook, comment off related lines (those will be declared later with in line comments).\n", + "To be able to use several metrics (e.g SacreBLEU, BERTScore, etc.), you need to install related package. When you try to use it without having those required packages, an exception will be thrown indicating that installation of specific package is required. If you want to see score outputs for SacreBLEU and BERTScore in the experiments in this notebook, comment off related lines (those will be declared later with in line comments).\n", "\n", "If you want to see/use those metrics, install required packages below with commenting off the code cell below." ] @@ -33,7 +26,7 @@ }, "outputs": [], "source": [ - "!pip install jury" + "!pip install -q jury" ] }, { @@ -56,31 +49,14 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T19:59:22.515146Z", "start_time": "2021-10-02T19:59:20.716835Z" } }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/devrim/anaconda3/envs/jury/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n", - "2022-09-30 17:48:30.188018: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2022-09-30 17:48:30.361067: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", - "2022-09-30 17:48:30.361090: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", - "2022-09-30 17:48:30.396709: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", - "2022-09-30 17:48:30.939257: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", - "2022-09-30 17:48:30.939339: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", - "2022-09-30 17:48:30.939347: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n" - ] - } - ], + "outputs": [], "source": [ "import os\n", "import json # Just for pretty printing the resulting dict.\n", @@ -90,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T19:59:22.531153Z", @@ -119,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T19:59:22.547167Z", @@ -145,7 +121,44 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Define Metrics\n", + "### Load Metric\n", + "\n", + "Here, we begin by loading and computing a single metric used to evaluate MT prediction and references. You can use load function from jury where you can pass additional parameters to specified metric." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bleu_2 = load_metric(\n", + " \"bleu\",\n", + " resulting_name=\"bleu_2\",\n", + " compute_kwargs={\"max_order\": 2},\n", + " # **kwargs_passed_to_evaluate\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "bleu_2.compute(\n", + " predictions=mt_predictions, \n", + " references=mt_references\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Define Multiple Metrics\n", "\n", "Here define your metrics used to evaluate MT prediction and references. You can either use load function from jury where you can pass additional parameters to specified metric, or specify as string, which will use default parameters.\n", "\n", @@ -156,7 +169,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T19:59:22.562180Z", @@ -187,7 +200,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T19:59:26.664594Z", @@ -204,7 +217,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T19:59:26.679608Z", @@ -212,48 +225,7 @@ }, "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"total_items\": 2,\n", - " \"empty_items\": 0,\n", - " \"bleu_1\": {\n", - " \"score\": 0.8823529411764706,\n", - " \"precisions\": [\n", - " 0.8823529411764706\n", - " ],\n", - " \"brevity_penalty\": 1.0,\n", - " \"length_ratio\": 1.0,\n", - " \"translation_length\": 11,\n", - " \"reference_length\": 11\n", - " },\n", - " \"bleu_2\": {\n", - " \"score\": 0.7531446678801508,\n", - " \"precisions\": [\n", - " 0.8823529411764706,\n", - " 0.6428571428571429\n", - " ],\n", - " \"brevity_penalty\": 1.0,\n", - " \"length_ratio\": 1.0,\n", - " \"translation_length\": 11,\n", - " \"reference_length\": 11\n", - " },\n", - " \"meteor\": {\n", - " \"score\": 0.727184593644221\n", - " },\n", - " \"rouge\": {\n", - " \"rouge1\": 0.7783882783882783,\n", - " \"rouge2\": 0.5925324675324675,\n", - " \"rougeL\": 0.7426739926739926,\n", - " \"rougeLsum\": 0.7426739926739926\n", - " }\n", - "}\n" - ] - } - ], + "outputs": [], "source": [ "# Display results\n", "print(json.dumps(scores, indent=4))" @@ -270,7 +242,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T19:59:29.552689Z", @@ -289,7 +261,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T19:59:30.130953Z", @@ -297,22 +269,7 @@ }, "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"total_items\": 3,\n", - " \"empty_items\": 0,\n", - " \"squad\": {\n", - " \"exact_match\": 0.33333333333333337,\n", - " \"f1\": 0.8222222222222223\n", - " }\n", - "}\n" - ] - } - ], + "outputs": [], "source": [ "qa_jury = Jury(metrics=QA_METRICS, run_concurrent=False)\n", "scores = qa_jury(predictions=qa_predictions, references=qa_references)\n", @@ -330,7 +287,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T20:17:28.152162Z", @@ -422,7 +379,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T20:17:28.353117Z", @@ -439,7 +396,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-10-02T20:17:28.583593Z", @@ -447,25 +404,7 @@ }, "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"total_items\": 3,\n", - " \"empty_items\": 0,\n", - " \"squad\": {\n", - " \"exact_match\": 0.33333333333333337,\n", - " \"f1\": 0.8222222222222223\n", - " },\n", - " \"word_match\": {\n", - " \"score\": 0.7222222222222222\n", - " }\n", - "}\n" - ] - } - ], + "outputs": [], "source": [ "qa_jury = Jury(metrics=QA_METRICS, run_concurrent=False)\n", "scores = qa_jury(predictions=qa_predictions, references=qa_references)\n",