diff --git a/notebooks/Optimization.ipynb b/notebooks/Optimization.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..e593332f3f789258b94007e8a6cb3b698762b79f --- /dev/null +++ b/notebooks/Optimization.ipynb @@ -0,0 +1,302 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "df593963", + "metadata": {}, + "source": [ + "# Optimization Example\n", + "\n", + "This notebook shows a example of how to user linear programming to solve following optimization problem.\n", + "\n", + "## Problem Description\n", + "\n", + "TBD\n", + "\n", + "## General approach\n", + "\n", + "1. Create all possible combinations of papers given the maximum number of papers for a time slot.\n", + "2. Remove combinations which are impossible to satisfy because authors have conflicting time slot preferences.\n", + "3. Add a contraint to limit the number of time slots.\n", + "4. Add a constraint to make sure that a paper can only appear in up to one time slot.\n", + "5. Solve" + ] + }, + { + "cell_type": "markdown", + "id": "0e2d98ee", + "metadata": {}, + "source": [ + "## Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a732db1", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install pulp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "64604da8", + "metadata": {}, + "outputs": [], + "source": [ + "import pulp\n", + "from dataclasses import dataclass, field\n", + "from typing import List" + ] + }, + { + "cell_type": "markdown", + "id": "97cdf625", + "metadata": {}, + "source": [ + "## Data structures" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7036ad2e", + "metadata": {}, + "outputs": [], + "source": [ + "@dataclass\n", + "class Reviewer:\n", + " id: str\n", + " time_slots: List[int]\n", + "\n", + " def __repr__(self) -> str:\n", + " return self.id\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e8352c4f", + "metadata": {}, + "outputs": [], + "source": [ + "@dataclass(eq=True, frozen=True)\n", + "class Paper:\n", + " id: str = field(hash=True)\n", + " reviewers: List[Reviewer] = field(compare=False, hash=False)\n", + " score: int = field(compare=False, hash=False)\n", + "\n", + " def __repr__(self) -> str:\n", + " result = f'{self.id} (score: {self.score})'\n", + "\n", + " for rev in self.reviewers:\n", + " result += f'\\n{rev}'\n", + "\n", + " return result" + ] + }, + { + "cell_type": "markdown", + "id": "405bf280", + "metadata": {}, + "source": [ + "## Functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b90cf13", + "metadata": {}, + "outputs": [], + "source": [ + "def score(time_slot) -> int:\n", + " '''\n", + " Takes a time slot (i.e. a list of papers) and calculates a score.\n", + " The scores takes the number of papers in the time slot and the review score into account.\n", + " '''\n", + " \n", + " paper_count = len(time_slot)\n", + " \n", + " if paper_count == 1:\n", + " if len(time_slot[0].reviewers) > 0:\n", + " review_score = 2 - abs(time_slot[0].score)\n", + " return paper_count + review_score\n", + " else:\n", + " return 0\n", + "\n", + " result = paper_count\n", + "\n", + " for p in time_slot:\n", + " review_score = 2 - abs(p.score)\n", + "\n", + " if len(p.reviewers) == 1:\n", + " result += review_score\n", + " elif len(p.reviewers) > 1:\n", + " result += review_score\n", + " \n", + " # for multiple reviewers, we could also take into account the number of reviewers or how many\n", + " # reviewers are available at the same time, but currently this is not used\n", + " \n", + " # all_ts_lists = []\n", + " # for rev in p.reviewers:\n", + " # if len(rev.time_slots) > 0:\n", + " # all_ts_lists.append(rev.time_slots)\n", + "\n", + " # inters = set.intersection(*[set(x) for x in all_ts_lists])\n", + " # if len(inters) > 0:\n", + " # result += review_score\n", + "\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dcd2472b", + "metadata": {}, + "outputs": [], + "source": [ + "def check(time_slot) -> bool:\n", + " '''\n", + " Takes a time slot (i.e. a list of papers) and checks if there are any contradictions, i.e., for every paper\n", + " with at least one reviewer, there is a time slot where at least one reviewer for every paper is available.\n", + " '''\n", + " all_ts_lists = []\n", + "\n", + " for p in time_slot:\n", + " all_p_ts = []\n", + "\n", + " for rev in p.reviewers:\n", + " if len(rev.time_slots) > 0:\n", + " all_p_ts.extend(rev.time_slots)\n", + "\n", + " all_ts_lists.append(all_p_ts)\n", + "\n", + " if len(all_ts_lists) > 0:\n", + " result = len(set.intersection(*[set(x) for x in all_ts_lists]))\n", + " return result >= 1\n", + " else:\n", + " return True" + ] + }, + { + "cell_type": "markdown", + "id": "a09dfb77", + "metadata": {}, + "source": [ + "## Test data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "61224383", + "metadata": {}, + "outputs": [], + "source": [ + "r1 = Reviewer('Reviewer1', [0, 1])\n", + "r2 = Reviewer('Reviewer2', [0, 1])\n", + "r3 = Reviewer('Reviewer3', [0])\n", + "r4 = Reviewer('Reviewer4', [1])\n", + "r5 = Reviewer('Reviewer5', [2])\n", + "r6 = Reviewer('Reviewer6', [2])\n", + "r7 = Reviewer('Reviewer7', [2])\n", + "r8 = Reviewer('Reviewer8', [2])\n", + "r9 = Reviewer('Reviewer9', [0])\n", + "r10 = Reviewer('Reviewer10', [1])\n", + "\n", + "papers = []\n", + "\n", + "papers.append(Paper('Paper1', [r1, r2], 2))\n", + "papers.append(Paper('Paper2', [r1], 0))\n", + "papers.append(Paper('Paper3', [r3, r4], 0))\n", + "papers.append(Paper('Paper4', [r5, r6], -2))\n", + "papers.append(Paper('Paper5', [r7, r8], 0))\n", + "papers.append(Paper('Paper6', [r9, r10], -1))" + ] + }, + { + "cell_type": "markdown", + "id": "56c78a45", + "metadata": {}, + "source": [ + "## The Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "daafd13a", + "metadata": {}, + "outputs": [], + "source": [ + "max_time_slots = 2\n", + "max_slot_size = 2\n", + "\n", + "all_time_slots = [tuple(c) for c in pulp.allcombinations(papers, max_slot_size)]\n", + "possible_time_slots = []\n", + "\n", + "for ts in all_time_slots:\n", + " if check(ts):\n", + " possible_time_slots.append(ts)\n", + "\n", + "x = pulp.LpVariable.dicts(\n", + " \"time_slot\", possible_time_slots, lowBound=0, upBound=1, cat=pulp.LpInteger\n", + ")\n", + "\n", + "review_model = pulp.LpProblem(\"Review Model\", pulp.LpMaximize)\n", + "review_model += pulp.lpSum([score(ts) * x[ts] for ts in possible_time_slots])\n", + "\n", + "review_model += (\n", + " pulp.lpSum([x[ts] for ts in possible_time_slots]) <= max_time_slots,\n", + " \"Maximum_number_of_time_slots\",\n", + ")\n", + "\n", + "for paper in papers:\n", + " review_model += (\n", + " pulp.lpSum([x[ts] for ts in possible_time_slots if paper in ts]) <= 1,\n", + " f\"Must_assign_{paper}\",\n", + " )\n", + "\n", + "status = review_model.solve()\n", + "\n", + "if status == 1:\n", + " slot_count = 1\n", + "\n", + " print(f\"The best time slots are:\")\n", + " for ts in possible_time_slots:\n", + " if x[ts].value() == 1.0:\n", + " print(f'\\nSlot {slot_count}:')\n", + "\n", + " for p in ts:\n", + " print(f'\\n{p}')\n", + "\n", + " slot_count += 1\n", + "\n", + "else:\n", + " print('Problem could not be solved!')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "856d62e2", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}