add notebook and result

Browse files

Files changed (2) hide show

results.csv +0 -0
trainer-skripsiiii-sft-mt5.ipynb +1 -0

results.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

trainer-skripsiiii-sft-mt5.ipynb ADDED Viewed

	@@ -0,0 +1 @@

+ {"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3","language":"python"},"language_info":{"name":"python","version":"3.10.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"accelerator":"GPU","colab":{"gpuType":"T4","provenance":[]},"kaggle":{"accelerator":"gpu","dataSources":[],"dockerImageVersionId":30919,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true},"widgets":{"application/vnd.jupyter.widget-state+json":{"91d408aab9e64001bb8af13ba5cf5ba0":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_78cf1b4bf80f44379037e2a95f1fc92f","IPY_MODEL_c2f8d43e961b4bf58914d56ae1033656","IPY_MODEL_6bb0af0a5af74002b2665f5536695f4c"],"layout":"IPY_MODEL_c7198f52805d47189fe66fdf5c4332e0"}},"78cf1b4bf80f44379037e2a95f1fc92f":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_665bde3407cf4f528ced9de85e23c74f","placeholder":"","style":"IPY_MODEL_2146bb2bf0964998bf1dd171aadc5fba","value":"Map: 52%"}},"c2f8d43e961b4bf58914d56ae1033656":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"danger","description":"","description_tooltip":null,"layout":"IPY_MODEL_78f2b31ae81d4dfcba9cec59cc7a3f8f","max":1931,"min":0,"orientation":"horizontal","style":"IPY_MODEL_a4e1b8be4b7649fabc0ae5c2a371e0b1","value":1000}},"6bb0af0a5af74002b2665f5536695f4c":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_7d50b66a0f2c4c15abc290dd2f8c9627","placeholder":"","style":"IPY_MODEL_b2370278e33c4c8ea9d7801d189d6e95","value":" 1000/1931 [00:04<00:02, 356.29 examples/s]"}},"c7198f52805d47189fe66fdf5c4332e0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"665bde3407cf4f528ced9de85e23c74f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2146bb2bf0964998bf1dd171aadc5fba":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"78f2b31ae81d4dfcba9cec59cc7a3f8f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a4e1b8be4b7649fabc0ae5c2a371e0b1":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"7d50b66a0f2c4c15abc290dd2f8c9627":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b2370278e33c4c8ea9d7801d189d6e95":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"!pip install --upgrade pip --quiet\n!pip install torch torchdata --disable-pip-version-check --quiet\n!pip install transformers datasets evaluate trl rouge_score loralib peft --quiet --upgrade","metadata":{"id":"bY14oaaJfiAt","trusted":true,"execution":{"iopub.status.busy":"2025-04-12T02:11:01.031862Z","iopub.execute_input":"2025-04-12T02:11:01.032140Z","iopub.status.idle":"2025-04-12T02:11:26.950722Z","shell.execute_reply.started":"2025-04-12T02:11:01.032110Z","shell.execute_reply":"2025-04-12T02:11:26.949535Z"}},"outputs":[{"name":"stdout","text":"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m60.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.4/10.4 MB\u001b[0m \u001b[31m135.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25h Building wheel for rouge_score (setup.py) ... \u001b[?25l\u001b[?25hdone\n","output_type":"stream"}],"execution_count":1},{"cell_type":"markdown","source":"- download dataset\n","metadata":{"id":"5RpSZ3SKPEMs"}},{"cell_type":"code","source":"# dataset (train:10%, test: 100%, dev:100%)\n!gdown https://drive.google.com/file/d/1p3tpxL48OovxJu4F_gQlootqUHIwc3yF/view?usp=sharing --fuzzy\n# dataset (train:100%, test: 100%, dev:100%)\n# !gdown https://drive.google.com/file/d/1RzYl93ukj_FLGew2SH2Or0xLW50MdeZu/view?usp=sharing --fuzzy\n!tar xf data.tar.bz2","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"NJZXuXWof8NF","outputId":"07754704-cb68-4f49-b137-b9acf62363de","trusted":true,"execution":{"iopub.status.busy":"2025-04-11T14:48:45.512479Z","iopub.execute_input":"2025-04-11T14:48:45.512743Z","iopub.status.idle":"2025-04-11T14:49:06.920701Z","shell.execute_reply.started":"2025-04-11T14:48:45.512709Z","shell.execute_reply":"2025-04-11T14:49:06.919666Z"}},"outputs":[{"name":"stdout","text":"Downloading...\nFrom: https://drive.google.com/uc?id=1p3tpxL48OovxJu4F_gQlootqUHIwc3yF\nTo: /kaggle/working/data.tar.bz2\n100%|██████████████████████████████████████| 7.08M/7.08M [00:00<00:00, 14.6MB/s]\n","output_type":"stream"}],"execution_count":2},{"cell_type":"markdown","source":"# Import Library\n","metadata":{"id":"SKd35sp1PEMx"}},{"cell_type":"code","source":"import torch\nfrom datasets import load_from_disk\nfrom trl import SFTTrainer, SFTConfig\nfrom peft import LoraConfig, get_peft_model, TaskType, PeftModel\nfrom transformers import (\n AutoModelForSeq2SeqLM,\n EncoderDecoderModel,\n AutoModelForCausalLM,\n AutoTokenizer,\n DataCollatorForSeq2Seq,\n TrainingArguments,\n Trainer,\n TrainerCallback,\n Seq2SeqTrainer,\n Seq2SeqTrainingArguments,\n GenerationConfig,\n pipeline\n)\n\nimport os\nimport evaluate\n\nimport numpy as np\nimport pandas as pd\nimport datetime as dt","metadata":{"id":"KDdos8MAfiA0","trusted":true,"execution":{"iopub.status.busy":"2025-04-12T02:11:26.952370Z","iopub.execute_input":"2025-04-12T02:11:26.952703Z","iopub.status.idle":"2025-04-12T02:11:50.822323Z","shell.execute_reply.started":"2025-04-12T02:11:26.952671Z","shell.execute_reply":"2025-04-12T02:11:50.821636Z"}},"outputs":[],"execution_count":2},{"cell_type":"code","source":"os.environ[\"WANDB_PROJECT\"]=\"skripsiiii\"\nos.environ[\"WANDB_WATCH\"]=\"false\"\n!wandb online\n\nWANDB_API_KEY = \"e670a715cf2aa97459e943ae3e2d2f6cdb600d5e\"\n!wandb login {WANDB_API_KEY} --relogin\n\n# !wandb offline\n# !wandb disabled","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OnEF3D1pPEM5","outputId":"d33e6802-4a5d-45a6-8df4-0f06f3e1f70a","trusted":true,"execution":{"iopub.status.busy":"2025-04-12T02:11:50.823357Z","iopub.execute_input":"2025-04-12T02:11:50.823577Z","iopub.status.idle":"2025-04-12T02:11:54.331526Z","shell.execute_reply.started":"2025-04-12T02:11:50.823559Z","shell.execute_reply":"2025-04-12T02:11:54.330376Z"}},"outputs":[{"name":"stdout","text":"W&B online. Running your script from this directory will now sync to the cloud.\n\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m If you're specifying your api key in code, ensure this code is not shared publicly.\n\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Consider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line.\n\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n","output_type":"stream"}],"execution_count":3},{"cell_type":"code","source":"metric = evaluate.load(\"rouge\")\ndevice = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n\ntorch.cuda.empty_cache()\ndevice","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"pVH6P1xscul1","outputId":"4086f133-74e6-4944-f9ae-34e1d017c12e","trusted":true,"execution":{"iopub.status.busy":"2025-04-12T02:11:54.332529Z","iopub.execute_input":"2025-04-12T02:11:54.332788Z","iopub.status.idle":"2025-04-12T02:11:56.792738Z","shell.execute_reply.started":"2025-04-12T02:11:54.332763Z","shell.execute_reply":"2025-04-12T02:11:56.791827Z"}},"outputs":[{"output_type":"display_data","data":{"text/plain":"Downloading builder script: 0%| | 0.00/6.27k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"a00dbd8c65f64693a659c3f172823746"}},"metadata":{}},{"execution_count":4,"output_type":"execute_result","data":{"text/plain":"device(type='cuda')"},"metadata":{}}],"execution_count":4},{"cell_type":"code","source":"def load_model(\n path_base_model, device=device\n):\n base_model = AutoModelForSeq2SeqLM.from_pretrained(path_base_model).to(device)\n tokenizer = AutoTokenizer.from_pretrained(path_base_model)\n\n print(base_model.generation_config)\n\n return base_model, tokenizer","metadata":{"id":"_qgGDWBPU-Op","trusted":true,"execution":{"iopub.status.busy":"2025-04-12T02:11:56.794508Z","iopub.execute_input":"2025-04-12T02:11:56.794745Z","iopub.status.idle":"2025-04-12T02:11:56.798863Z","shell.execute_reply.started":"2025-04-12T02:11:56.794726Z","shell.execute_reply":"2025-04-12T02:11:56.797999Z"}},"outputs":[],"execution_count":5},{"cell_type":"code","source":"base_model_name = \"google/mt5-small\"\n\nbase_model, tokenizer = load_model(\n path_base_model=base_model_name,\n device=device,\n)","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"8NBrwF40LP8y","outputId":"63aca162-3ae8-4d27-d91b-839e6ffd54ec","trusted":true,"execution":{"iopub.status.busy":"2025-04-12T02:18:24.048710Z","iopub.execute_input":"2025-04-12T02:18:24.049092Z","iopub.status.idle":"2025-04-12T02:18:30.661959Z","shell.execute_reply.started":"2025-04-12T02:18:24.049060Z","shell.execute_reply":"2025-04-12T02:18:30.661113Z"}},"outputs":[{"name":"stdout","text":"GenerationConfig {\n \"decoder_start_token_id\": 0,\n \"eos_token_id\": 1,\n \"pad_token_id\": 0\n}\n\n","output_type":"stream"}],"execution_count":23},{"cell_type":"code","source":"def print_number_of_trainable_model_parameters(model):\n trainable_model_params = 0\n all_model_params = 0\n for _, param in model.named_parameters():\n all_model_params += param.numel()\n if param.requires_grad:\n trainable_model_params += param.numel()\n return f\"trainable model parameters: {trainable_model_params}\\nall model parameters: {all_model_params}\\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%\"\n\nprint(print_number_of_trainable_model_parameters(base_model))","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-04-12T02:12:57.952001Z","iopub.execute_input":"2025-04-12T02:12:57.952728Z","iopub.status.idle":"2025-04-12T02:12:57.960266Z","shell.execute_reply.started":"2025-04-12T02:12:57.952688Z","shell.execute_reply":"2025-04-12T02:12:57.959117Z"},"colab":{"base_uri":"https://localhost:8080/"},"id":"x6unyJ9WRSnW","outputId":"589efadc-5605-4fb6-b846-30d35cdcb611"},"outputs":[{"name":"stdout","text":"trainable model parameters: 300176768\nall model parameters: 300176768\npercentage of trainable model parameters: 100.00%\n","output_type":"stream"}],"execution_count":7},{"cell_type":"code","source":"dataset = load_from_disk(\"data\")\ndataset","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"uH-ESJWfbGJI","outputId":"479126d6-cce9-47f2-e1c0-8e8a4371ef92","trusted":true,"execution":{"iopub.status.busy":"2025-04-12T02:12:57.961358Z","iopub.execute_input":"2025-04-12T02:12:57.961612Z","iopub.status.idle":"2025-04-12T02:12:58.023851Z","shell.execute_reply.started":"2025-04-12T02:12:57.961590Z","shell.execute_reply":"2025-04-12T02:12:58.022975Z"}},"outputs":[{"execution_count":8,"output_type":"execute_result","data":{"text/plain":"DatasetDict({\n validation: Dataset({\n features: ['id', 'url', 'clean_article', 'clean_summary', 'extractive_summary'],\n num_rows: 2474\n })\n test: Dataset({\n features: ['id', 'url', 'clean_article', 'clean_summary', 'extractive_summary'],\n num_rows: 1931\n })\n train: Dataset({\n features: ['id', 'url', 'clean_article', 'clean_summary', 'extractive_summary'],\n num_rows: 9694\n })\n})"},"metadata":{}}],"execution_count":8},{"cell_type":"code","source":"def mapping_dataset(feature, max_length=None):\n feature[\"clean_article\"] = [\n article.strip()\n for article in feature[\"clean_article\"]\n ]\n\n temp_input = tokenizer(\n feature[\"clean_article\"], padding=True, truncation=True, return_tensors=\"pt\", max_length=max_length\n )\n\n feature['input_ids'] = temp_input.input_ids\n feature['attention_mask'] = temp_input.attention_mask\n\n feature[\"label\"] = tokenizer(\n feature[\"clean_summary\"], padding=True, truncation=False, return_tensors=\"pt\"\n ).input_ids\n\n feature[\"len_input_ids\"] = list(map(lambda x: len(x), feature[\"input_ids\"]))\n\n return feature\n\ndataset = dataset.map(mapping_dataset, batched=True, fn_kwargs={\"max_length\": 512})","metadata":{"id":"QwnmuTIe57tZ","colab":{"base_uri":"https://localhost:8080/","height":1000,"referenced_widgets":["91d408aab9e64001bb8af13ba5cf5ba0","78cf1b4bf80f44379037e2a95f1fc92f","c2f8d43e961b4bf58914d56ae1033656","6bb0af0a5af74002b2665f5536695f4c","c7198f52805d47189fe66fdf5c4332e0","665bde3407cf4f528ced9de85e23c74f","2146bb2bf0964998bf1dd171aadc5fba","78f2b31ae81d4dfcba9cec59cc7a3f8f","a4e1b8be4b7649fabc0ae5c2a371e0b1","7d50b66a0f2c4c15abc290dd2f8c9627","b2370278e33c4c8ea9d7801d189d6e95"]},"outputId":"c0597981-75c4-4ded-d430-720a5d0d2687","trusted":true,"execution":{"iopub.status.busy":"2025-04-12T02:12:58.024901Z","iopub.execute_input":"2025-04-12T02:12:58.025206Z","iopub.status.idle":"2025-04-12T02:13:00.049835Z","shell.execute_reply.started":"2025-04-12T02:12:58.025183Z","shell.execute_reply":"2025-04-12T02:13:00.048901Z"}},"outputs":[{"output_type":"display_data","data":{"text/plain":"Map: 0%| | 0/1931 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"2eb17aa4a264431298c8c62b5dc85a11"}},"metadata":{}}],"execution_count":9},{"cell_type":"code","source":"dataset = dataset.remove_columns(\n [\"url\", \"extractive_summary\", \"id\"]\n)\ndataset","metadata":{"id":"XqEQdHYkpaPd","colab":{"base_uri":"https://localhost:8080/"},"outputId":"016ee8d6-32c7-4ba4-ce6d-c0836268d3d2","trusted":true,"execution":{"iopub.status.busy":"2025-04-12T02:13:00.050728Z","iopub.execute_input":"2025-04-12T02:13:00.051029Z","iopub.status.idle":"2025-04-12T02:13:00.062391Z","shell.execute_reply.started":"2025-04-12T02:13:00.050978Z","shell.execute_reply":"2025-04-12T02:13:00.061391Z"}},"outputs":[{"execution_count":10,"output_type":"execute_result","data":{"text/plain":"DatasetDict({\n validation: Dataset({\n features: ['clean_article', 'clean_summary', 'input_ids', 'attention_mask', 'label', 'len_input_ids'],\n num_rows: 2474\n })\n test: Dataset({\n features: ['clean_article', 'clean_summary', 'input_ids', 'attention_mask', 'label', 'len_input_ids'],\n num_rows: 1931\n })\n train: Dataset({\n features: ['clean_article', 'clean_summary', 'input_ids', 'attention_mask', 'label', 'len_input_ids'],\n num_rows: 9694\n })\n})"},"metadata":{}}],"execution_count":10},{"cell_type":"code","source":"def compute_reward(\n predictions,\n labels,\n return_dict=True,\n include_pred_label=False,\n is_already_decode=False,\n use_aggregator=True,\n):\n if not is_already_decode:\n labels = np.where(labels == -100, tokenizer.pad_token_type_id, labels)\n predictions = np.where(\n predictions == -100, tokenizer.pad_token_type_id, predictions\n )\n\n decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)\n decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)\n else:\n decoded_preds = predictions\n decoded_labels = labels\n\n result = metric.compute(\n predictions=decoded_preds,\n references=decoded_labels,\n rouge_types=[\"rougeL\"],\n use_aggregator=use_aggregator,\n use_stemmer=True,\n )\n\n result = result if return_dict else result[\"rougeL\"]\n\n if include_pred_label:\n return result, decoded_preds, decoded_labels\n else:\n return result\n\n\ndef compute_metrics(eval_pred):\n predictions, labels = eval_pred\n return compute_reward(predictions, labels)","metadata":{"id":"68Qtf4hHPENP","trusted":true,"execution":{"iopub.status.busy":"2025-04-12T02:13:00.063635Z","iopub.execute_input":"2025-04-12T02:13:00.064236Z","iopub.status.idle":"2025-04-12T02:13:00.074784Z","shell.execute_reply.started":"2025-04-12T02:13:00.064214Z","shell.execute_reply":"2025-04-12T02:13:00.073855Z"}},"outputs":[],"execution_count":11},{"cell_type":"code","source":"OUTPUT_DIR = f\"results-{str(dt.datetime.today())}\"\nOUTPUT_DIR = \"results-2025-04-11 14:50:06.350109\"\nOUTPUT_DIR","metadata":{"id":"HlzF-i-wPENi","colab":{"base_uri":"https://localhost:8080/","height":35},"outputId":"565e8438-6f2a-4945-d1f5-7588a6f3aafe","trusted":true,"execution":{"iopub.status.busy":"2025-04-12T02:13:00.075687Z","iopub.execute_input":"2025-04-12T02:13:00.075953Z","iopub.status.idle":"2025-04-12T02:13:00.093068Z","shell.execute_reply.started":"2025-04-12T02:13:00.075919Z","shell.execute_reply":"2025-04-12T02:13:00.091979Z"}},"outputs":[{"execution_count":12,"output_type":"execute_result","data":{"text/plain":"'results-2025-04-11 14:50:06.350109'"},"metadata":{}}],"execution_count":12},{"cell_type":"code","source":"training_args = Seq2SeqTrainingArguments(\n output_dir=OUTPUT_DIR,\n save_strategy=\"epoch\",\n eval_strategy=\"epoch\",\n learning_rate=5e-5,\n auto_find_batch_size=True,\n seed=42,\n num_train_epochs=10,\n logging_dir=OUTPUT_DIR,\n logging_steps=1,\n lr_scheduler_type=\"cosine\",\n dataloader_pin_memory=True,\n save_total_limit=2,\n # warmup_ratio=0.01,\n # report_to=\"tensorboard\",\n)","metadata":{"id":"56DKgTev-esG","trusted":true,"execution":{"iopub.status.busy":"2025-04-12T02:13:00.094377Z","iopub.execute_input":"2025-04-12T02:13:00.094600Z","iopub.status.idle":"2025-04-12T02:13:00.137676Z","shell.execute_reply.started":"2025-04-12T02:13:00.094581Z","shell.execute_reply":"2025-04-12T02:13:00.136987Z"}},"outputs":[],"execution_count":13},{"cell_type":"code","source":"data_collator = DataCollatorForSeq2Seq(\n tokenizer=tokenizer, padding=\"longest\"\n)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-04-12T02:13:00.139663Z","iopub.execute_input":"2025-04-12T02:13:00.139885Z","iopub.status.idle":"2025-04-12T02:13:00.143789Z","shell.execute_reply.started":"2025-04-12T02:13:00.139867Z","shell.execute_reply":"2025-04-12T02:13:00.142955Z"},"id":"Wl8WxQ38RSnb"},"outputs":[],"execution_count":14},{"cell_type":"code","source":"trainer = Seq2SeqTrainer(\n model=base_model,\n args=training_args,\n processing_class=tokenizer,\n data_collator=data_collator,\n train_dataset=dataset[\"train\"],\n eval_dataset=dataset[\"validation\"],\n)","metadata":{"id":"wwCMuOGsNx3M","trusted":true,"execution":{"iopub.status.busy":"2025-04-12T02:13:00.144690Z","iopub.execute_input":"2025-04-12T02:13:00.144957Z","iopub.status.idle":"2025-04-12T02:13:00.345607Z","shell.execute_reply.started":"2025-04-12T02:13:00.144924Z","shell.execute_reply":"2025-04-12T02:13:00.344955Z"}},"outputs":[],"execution_count":15},{"cell_type":"code","source":"trainer.evaluate()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-04-11T14:50:06.638606Z","iopub.execute_input":"2025-04-11T14:50:06.638967Z","iopub.status.idle":"2025-04-11T14:50:47.062995Z","shell.execute_reply.started":"2025-04-11T14:50:06.638934Z","shell.execute_reply":"2025-04-11T14:50:47.062088Z"},"colab":{"base_uri":"https://localhost:8080/","height":318},"id":"pHmi4_EHRSnb","outputId":"fbd55164-3cba-4388-f5bb-6aaf875cfa29"},"outputs":[{"name":"stderr","text":"Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"\n <div>\n \n <progress value='620' max='310' style='width:300px; height:20px; vertical-align: middle;'></progress>\n [310/310 09:25]\n </div>\n "},"metadata":{}},{"name":"stderr","text":"\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.\n\u001b[34m\u001b[1mwandb\u001b[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.\n\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmalifnasrulloh\u001b[0m (\u001b[33mmalifnasrullohhh\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Tracking run with wandb version 0.19.1"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Run data is saved locally in <code>/kaggle/working/wandb/run-20250411_145041-r36zy2ce</code>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Syncing run <strong><a href='https://wandb.ai/malifnasrullohhh/skripsiiii/runs/r36zy2ce' target=\"_blank\">results-2025-04-11 14:50:06.350109</a></strong> to <a href='https://wandb.ai/malifnasrullohhh/skripsiiii' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/developer-guide' target=\"_blank\">docs</a>)<br>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":" View project at <a href='https://wandb.ai/malifnasrullohhh/skripsiiii' target=\"_blank\">https://wandb.ai/malifnasrullohhh/skripsiiii</a>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":" View run at <a href='https://wandb.ai/malifnasrullohhh/skripsiiii/runs/r36zy2ce' target=\"_blank\">https://wandb.ai/malifnasrullohhh/skripsiiii/runs/r36zy2ce</a>"},"metadata":{}},{"execution_count":17,"output_type":"execute_result","data":{"text/plain":"{'eval_loss': 31.720582962036133,\n 'eval_model_preparation_time': 0.0082,\n 'eval_runtime': 28.2996,\n 'eval_samples_per_second': 87.422,\n 'eval_steps_per_second': 10.954}"},"metadata":{}}],"execution_count":17},{"cell_type":"code","source":"trainer.train()","metadata":{"id":"eQax_yXO5cZC","trusted":true,"execution":{"iopub.status.busy":"2025-04-11T14:50:47.064060Z","iopub.execute_input":"2025-04-11T14:50:47.064364Z","iopub.status.idle":"2025-04-11T16:19:42.860666Z","shell.execute_reply.started":"2025-04-11T14:50:47.064333Z","shell.execute_reply":"2025-04-11T16:19:42.859640Z"}},"outputs":[{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"\n <div>\n \n <progress value='12120' max='12120' style='width:300px; height:20px; vertical-align: middle;'></progress>\n [12120/12120 1:28:53, Epoch 10/10]\n </div>\n <table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: left;\">\n <th>Epoch</th>\n <th>Training Loss</th>\n <th>Validation Loss</th>\n <th>Model Preparation Time</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>1</td>\n <td>1.547600</td>\n <td>1.994755</td>\n <td>0.008200</td>\n </tr>\n <tr>\n <td>2</td>\n <td>0.932500</td>\n <td>1.876694</td>\n <td>0.008200</td>\n </tr>\n <tr>\n <td>3</td>\n <td>0.966400</td>\n <td>1.750615</td>\n <td>0.008200</td>\n </tr>\n <tr>\n <td>4</td>\n <td>0.548300</td>\n <td>1.701512</td>\n <td>0.008200</td>\n </tr>\n <tr>\n <td>5</td>\n <td>0.600700</td>\n <td>1.678156</td>\n <td>0.008200</td>\n </tr>\n <tr>\n <td>6</td>\n <td>0.943900</td>\n <td>1.649590</td>\n <td>0.008200</td>\n </tr>\n <tr>\n <td>7</td>\n <td>0.727100</td>\n <td>1.629692</td>\n <td>0.008200</td>\n </tr>\n <tr>\n <td>8</td>\n <td>0.551500</td>\n <td>1.640368</td>\n <td>0.008200</td>\n </tr>\n <tr>\n <td>9</td>\n <td>0.706400</td>\n <td>1.635413</td>\n <td>0.008200</td>\n </tr>\n <tr>\n <td>10</td>\n <td>0.469800</td>\n <td>1.635159</td>\n <td>0.008200</td>\n </tr>\n </tbody>\n</table><p>"},"metadata":{}},{"execution_count":18,"output_type":"execute_result","data":{"text/plain":"TrainOutput(global_step=12120, training_loss=1.5098526220362964, metrics={'train_runtime': 5334.4795, 'train_samples_per_second': 18.172, 'train_steps_per_second': 2.272, 'total_flos': 5.12569950339072e+16, 'train_loss': 1.5098526220362964, 'epoch': 10.0})"},"metadata":{}}],"execution_count":18},{"cell_type":"code","source":"def get_predict(save_path, trainer, data):\n result = trainer.predict(data, length_penalty=0.6,max_length=64,no_repeat_ngram_size=3,num_beams=8)\n scores, preds, labels = compute_reward(result.predictions, result.label_ids, return_dict=False, include_pred_label=True, use_aggregator=False)\n\n articles = data['clean_article']\n\n results = pd.DataFrame(\n {\n \"article\": articles,\n \"summary\": labels,\n \"pred_summary\": preds,\n \"rouge-l\": scores,\n }\n )\n\n if not os.path.exists(save_path):\n os.makedirs(save_path)\n\n results.to_csv(f\"{os.path.join(save_path,'results.csv')}\", index=False)\n return results","metadata":{"trusted":true,"id":"iKNok9l5RSne","execution":{"iopub.status.busy":"2025-04-12T02:25:48.366187Z","iopub.execute_input":"2025-04-12T02:25:48.366535Z","iopub.status.idle":"2025-04-12T02:25:48.372594Z","shell.execute_reply.started":"2025-04-12T02:25:48.366511Z","shell.execute_reply":"2025-04-12T02:25:48.371854Z"}},"outputs":[],"execution_count":28},{"cell_type":"code","source":"results_dir = OUTPUT_DIR\nresults = {}\n\nfor checkpoint in filter(lambda x:\"checkpoint-\" in x and os.path.isdir(os.path.join(results_dir, x)), os.listdir(results_dir)):\n checkpoint_dir = os.path.join(results_dir, checkpoint)\n\n inference_base_model, inference_tokenizer = load_model(\n path_base_model=checkpoint_dir,\n device=device\n )\n\n training_args.predict_with_generate = True\n\n inference_trainer = Seq2SeqTrainer(\n model=inference_base_model,\n args=training_args,\n processing_class=inference_tokenizer,\n data_collator=data_collator,\n train_dataset=dataset[\"train\"],\n eval_dataset=dataset[\"validation\"],\n )\n\n results[checkpoint] = get_predict(\n save_path=checkpoint_dir, trainer=inference_trainer, data=dataset[\"test\"]\n )","metadata":{"id":"w1odB2jIPENq","trusted":true,"execution":{"iopub.status.busy":"2025-04-12T02:25:50.951548Z","iopub.execute_input":"2025-04-12T02:25:50.951834Z","iopub.status.idle":"2025-04-12T02:45:15.514802Z","shell.execute_reply.started":"2025-04-12T02:25:50.951812Z","shell.execute_reply":"2025-04-12T02:45:15.513883Z"}},"outputs":[{"name":"stdout","text":"GenerationConfig {\n \"decoder_start_token_id\": 0,\n \"eos_token_id\": 1,\n \"pad_token_id\": 0\n}\n\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":""},"metadata":{}},{"name":"stdout","text":"GenerationConfig {\n \"decoder_start_token_id\": 0,\n \"eos_token_id\": 1,\n \"pad_token_id\": 0\n}\n\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":""},"metadata":{}}],"execution_count":29},{"cell_type":"code","source":"for i in results.keys():\n print(i)\n print(results[i].describe())","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-04-12T02:45:15.515995Z","iopub.execute_input":"2025-04-12T02:45:15.516267Z","iopub.status.idle":"2025-04-12T02:45:15.531360Z","shell.execute_reply.started":"2025-04-12T02:45:15.516245Z","shell.execute_reply":"2025-04-12T02:45:15.530625Z"}},"outputs":[{"name":"stdout","text":"checkpoint-12120\n rouge-l\ncount 1931.000000\nmean 0.243050\nstd 0.111563\nmin 0.000000\n25% 0.162162\n50% 0.232558\n75% 0.318182\nmax 0.693878\ncheckpoint-10908\n rouge-l\ncount 1931.000000\nmean 0.242967\nstd 0.111767\nmin 0.000000\n25% 0.162162\n50% 0.232558\n75% 0.318182\nmax 0.693878\n","output_type":"stream"}],"execution_count":30},{"cell_type":"code","source":"# for i in results.keys():\n# print(i)\n# print(results[i].describe())","metadata":{"id":"y8xzqzQuPENs","trusted":true,"execution":{"iopub.status.busy":"2025-04-11T16:19:46.813905Z","iopub.status.idle":"2025-04-11T16:19:46.814310Z","shell.execute_reply":"2025-04-11T16:19:46.814133Z"}},"outputs":[],"execution_count":null},{"cell_type":"code","source":"# !cd \"/kaggle/working/results-2025-02-10 06:53:32.835982/\" && zip checkpoint-4848.zip checkpoint-4848 -r && mv checkpoint-4848.zip ../\n# !zip 'results-2025-04-05 19:56:18.674693.zip' 'results-2025-04-05 19:56:18.674693/checkpoint-24240' -r\n\n# !wget https://kkb-production.jupyter-proxy.kaggle.net/k/226243528/eyJhbGciOiJkaXIiLCJlbmMiOiJBMTI4Q0JDLUhTMjU2IiwidHlwIjoiSldUIn0..8eavWLQijIAUiCiAa6Extw.oCEYz7s-3ENoy0Dq84h2Gwem5ewj_Gxm1B35QgpTrnpein6w5qxDh7C20mCEQUI5_Zwl2wY3R1Bqby15rC-MB1YJdgRwzNN9FIIVMiYwVtC2qA4czrwTOv3xaQum3nPHy47XqOlhlCIlamSt2Jk2-uhM1-axofMfwuosj8bRnL2ewDi3hlBBLiNEnfZXwxDO-FsUbRdHMtaMFjl-Z1mVQmbfIp2qVBJwc_84TT49Bu0hSeAXTw_b4BcTHTgFm1WJ.p94NZwxcGg1kkz7wVosTHw/proxy/files/results-2025-03-06%2018%3A21%3A25.845145.zip\n# !unzip \"results-2025-03-06 18:21:25.845145.zip\"","metadata":{"id":"la7dYZelPENs","trusted":true,"execution":{"iopub.status.busy":"2025-04-11T16:19:46.815139Z","iopub.status.idle":"2025-04-11T16:19:46.815430Z","shell.execute_reply":"2025-04-11T16:19:46.815315Z"}},"outputs":[],"execution_count":null},{"cell_type":"code","source":"# !tar cjf checkpoint-97.tar.bz2 checkpoint-97\n# !cd resultss && tar cjf checkpoint-970.tar.bz2 checkpoint-970\n\n# !cd \"/kaggle/working/results-2025-01-10 15:17:35.819440\" && zip checkpoint-4848.zip checkpoint-4848 -r\n# !mv \"/kaggle/working/results-2025-01-10 15:17:35.819440/checkpoint-4848.zip\" checkpoint-4848.zip\n\n# from IPython import display\n# display.FileLink(\"results-2025-04-05 19:56:18.674693.zip\")\n# display.FileLink(\"result-194-5e-5.csv.csv\")\n# !rm -rf \"/kaggle/working/checkpoint-3030.zip\"\n# !find -name 'results*' -type d -exec rm -rf {} +","metadata":{"id":"Ni1Ge8dJPENy","trusted":true,"execution":{"iopub.status.busy":"2025-04-11T16:19:46.817153Z","iopub.status.idle":"2025-04-11T16:19:46.817508Z","shell.execute_reply":"2025-04-11T16:19:46.817349Z"}},"outputs":[],"execution_count":null}]}