Add optimization notebook

728bc023 · Frederik Arnold · 2afa379a · 728bc023
Commit 728bc023 authored 1 year ago by Frederik Arnold
--- a/notebooks/Optimization.ipynb
+++ b/notebooks/Optimization.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "df593963",
+   "metadata": {},
+   "source": [
+    "# Optimization Example\n",
+    "\n",
+    "This notebook shows a example of how to user linear programming to solve following optimization problem.\n",
+    "\n",
+    "## Problem Description\n",
+    "\n",
+    "TBD\n",
+    "\n",
+    "## General approach\n",
+    "\n",
+    "1. Create all possible combinations of papers given the maximum number of papers for a time slot.\n",
+    "2. Remove combinations which are impossible to satisfy because authors have conflicting time slot preferences.\n",
+    "3. Add a contraint to limit the number of time slots.\n",
+    "4. Add a constraint to make sure that a paper can only appear in up to one time slot.\n",
+    "5. Solve"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0e2d98ee",
+   "metadata": {},
+   "source": [
+    "## Dependencies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5a732db1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install pulp"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "64604da8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pulp\n",
+    "from dataclasses import dataclass, field\n",
+    "from typing import List"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "97cdf625",
+   "metadata": {},
+   "source": [
+    "## Data structures"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7036ad2e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@dataclass\n",
+    "class Reviewer:\n",
+    "    id: str\n",
+    "    time_slots: List[int]\n",
+    "\n",
+    "    def __repr__(self) -> str:\n",
+    "        return self.id\n",
+    "    \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e8352c4f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@dataclass(eq=True, frozen=True)\n",
+    "class Paper:\n",
+    "    id: str = field(hash=True)\n",
+    "    reviewers: List[Reviewer] = field(compare=False, hash=False)\n",
+    "    score: int = field(compare=False, hash=False)\n",
+    "\n",
+    "    def __repr__(self) -> str:\n",
+    "        result = f'{self.id} (score: {self.score})'\n",
+    "\n",
+    "        for rev in self.reviewers:\n",
+    "            result += f'\\n{rev}'\n",
+    "\n",
+    "        return result"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "405bf280",
+   "metadata": {},
+   "source": [
+    "## Functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8b90cf13",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def score(time_slot) -> int:\n",
+    "    '''\n",
+    "    Takes a time slot (i.e. a list of papers) and calculates a score.\n",
+    "    The scores takes the number of papers in the time slot and the review score into account.\n",
+    "    '''\n",
+    "    \n",
+    "    paper_count = len(time_slot)\n",
+    "    \n",
+    "    if paper_count == 1:\n",
+    "        if len(time_slot[0].reviewers) > 0:\n",
+    "            review_score = 2 - abs(time_slot[0].score)\n",
+    "            return paper_count + review_score\n",
+    "        else:\n",
+    "            return 0\n",
+    "\n",
+    "    result = paper_count\n",
+    "\n",
+    "    for p in time_slot:\n",
+    "        review_score = 2 - abs(p.score)\n",
+    "\n",
+    "        if len(p.reviewers) == 1:\n",
+    "            result += review_score\n",
+    "        elif len(p.reviewers) > 1:\n",
+    "            result += review_score\n",
+    "            \n",
+    "            # for multiple reviewers, we could also take into account the number of reviewers or how many\n",
+    "            # reviewers are available at the same time, but currently this is not used\n",
+    "            \n",
+    "            # all_ts_lists = []\n",
+    "            # for rev in p.reviewers:\n",
+    "            #     if len(rev.time_slots) > 0:\n",
+    "            #         all_ts_lists.append(rev.time_slots)\n",
+    "\n",
+    "            # inters = set.intersection(*[set(x) for x in all_ts_lists])\n",
+    "            # if len(inters) > 0:\n",
+    "            #     result += review_score\n",
+    "\n",
+    "    return result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dcd2472b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def check(time_slot) -> bool:\n",
+    "    '''\n",
+    "    Takes a time slot (i.e. a list of papers) and checks if there are any contradictions, i.e., for every paper\n",
+    "    with at least one reviewer, there is a time slot where at least one reviewer for every paper is available.\n",
+    "    '''\n",
+    "    all_ts_lists = []\n",
+    "\n",
+    "    for p in time_slot:\n",
+    "        all_p_ts = []\n",
+    "\n",
+    "        for rev in p.reviewers:\n",
+    "            if len(rev.time_slots) > 0:\n",
+    "                all_p_ts.extend(rev.time_slots)\n",
+    "\n",
+    "        all_ts_lists.append(all_p_ts)\n",
+    "\n",
+    "    if len(all_ts_lists) > 0:\n",
+    "        result = len(set.intersection(*[set(x) for x in all_ts_lists]))\n",
+    "        return result >= 1\n",
+    "    else:\n",
+    "        return True"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a09dfb77",
+   "metadata": {},
+   "source": [
+    "## Test data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "61224383",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "r1 = Reviewer('Reviewer1', [0, 1])\n",
+    "r2 = Reviewer('Reviewer2', [0, 1])\n",
+    "r3 = Reviewer('Reviewer3', [0])\n",
+    "r4 = Reviewer('Reviewer4', [1])\n",
+    "r5 = Reviewer('Reviewer5', [2])\n",
+    "r6 = Reviewer('Reviewer6', [2])\n",
+    "r7 = Reviewer('Reviewer7', [2])\n",
+    "r8 = Reviewer('Reviewer8', [2])\n",
+    "r9 = Reviewer('Reviewer9', [0])\n",
+    "r10 = Reviewer('Reviewer10', [1])\n",
+    "\n",
+    "papers = []\n",
+    "\n",
+    "papers.append(Paper('Paper1', [r1, r2], 2))\n",
+    "papers.append(Paper('Paper2', [r1], 0))\n",
+    "papers.append(Paper('Paper3', [r3, r4], 0))\n",
+    "papers.append(Paper('Paper4', [r5, r6], -2))\n",
+    "papers.append(Paper('Paper5', [r7, r8], 0))\n",
+    "papers.append(Paper('Paper6', [r9, r10], -1))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "56c78a45",
+   "metadata": {},
+   "source": [
+    "## The Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "daafd13a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "max_time_slots = 2\n",
+    "max_slot_size = 2\n",
+    "\n",
+    "all_time_slots = [tuple(c) for c in pulp.allcombinations(papers, max_slot_size)]\n",
+    "possible_time_slots = []\n",
+    "\n",
+    "for ts in all_time_slots:\n",
+    "    if check(ts):\n",
+    "        possible_time_slots.append(ts)\n",
+    "\n",
+    "x = pulp.LpVariable.dicts(\n",
+    "    \"time_slot\", possible_time_slots, lowBound=0, upBound=1, cat=pulp.LpInteger\n",
+    ")\n",
+    "\n",
+    "review_model = pulp.LpProblem(\"Review Model\", pulp.LpMaximize)\n",
+    "review_model += pulp.lpSum([score(ts) * x[ts] for ts in possible_time_slots])\n",
+    "\n",
+    "review_model += (\n",
+    "    pulp.lpSum([x[ts] for ts in possible_time_slots]) <= max_time_slots,\n",
+    "    \"Maximum_number_of_time_slots\",\n",
+    ")\n",
+    "\n",
+    "for paper in papers:\n",
+    "    review_model += (\n",
+    "        pulp.lpSum([x[ts] for ts in possible_time_slots if paper in ts]) <= 1,\n",
+    "        f\"Must_assign_{paper}\",\n",
+    "    )\n",
+    "\n",
+    "status = review_model.solve()\n",
+    "\n",
+    "if status == 1:\n",
+    "    slot_count = 1\n",
+    "\n",
+    "    print(f\"The best time slots are:\")\n",
+    "    for ts in possible_time_slots:\n",
+    "        if x[ts].value() == 1.0:\n",
+    "            print(f'\\nSlot {slot_count}:')\n",
+    "\n",
+    "            for p in ts:\n",
+    "                print(f'\\n{p}')\n",
+    "\n",
+    "            slot_count += 1\n",
+    "\n",
+    "else:\n",
+    "    print('Problem could not be solved!')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "856d62e2",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
+%% Cell type:markdown id:df593963 tags:
+# Optimization Example
+This notebook shows a example of how to user linear programming to solve following optimization problem.
+## Problem Description
+TBD
+## General approach
+1. Create all possible combinations of papers given the maximum number of papers for a time slot.
+2. Remove combinations which are impossible to satisfy because authors have conflicting time slot preferences.
+3. Add a contraint to limit the number of time slots.
+4. Add a constraint to make sure that a paper can only appear in up to one time slot.
+5. Solve
+%% Cell type:markdown id:0e2d98ee tags:
+## Dependencies
+%% Cell type:code id:5a732db1 tags:
+``` 
+!pip install pulp
+```
+%% Cell type:code id:64604da8 tags:
+``` 
+import pulp
+from dataclasses import dataclass, field
+from typing import List
+```
+%% Cell type:markdown id:97cdf625 tags:
+## Data structures
+%% Cell type:code id:7036ad2e tags:
+``` 
+@dataclass
+class Reviewer:
+    id: str
+    time_slots: List[int]
+    def __repr__(self) -> str:
+        return self.id
+```
+%% Cell type:code id:e8352c4f tags:
+``` 
+@dataclass(eq=True, frozen=True)
+class Paper:
+    id: str = field(hash=True)
+    reviewers: List[Reviewer] = field(compare=False, hash=False)
+    score: int = field(compare=False, hash=False)
+    def __repr__(self) -> str:
+        result = f'{self.id} (score: {self.score})'
+        for rev in self.reviewers:
+            result += f'\n{rev}'
+        return result
+```
+%% Cell type:markdown id:405bf280 tags:
+## Functions
+%% Cell type:code id:8b90cf13 tags:
+``` 
+def score(time_slot) -> int:
+    '''
+    Takes a time slot (i.e. a list of papers) and calculates a score.
+    The scores takes the number of papers in the time slot and the review score into account.
+    '''
+    paper_count = len(time_slot)
+    if paper_count == 1:
+        if len(time_slot[0].reviewers) > 0:
+            review_score = 2 - abs(time_slot[0].score)
+            return paper_count + review_score
+        else:
+            return 0
+    result = paper_count
+    for p in time_slot:
+        review_score = 2 - abs(p.score)
+        if len(p.reviewers) == 1:
+            result += review_score
+        elif len(p.reviewers) > 1:
+            result += review_score
+            # for multiple reviewers, we could also take into account the number of reviewers or how many
+            # reviewers are available at the same time, but currently this is not used
+            # all_ts_lists = []
+            # for rev in p.reviewers:
+            #     if len(rev.time_slots) > 0:
+            #         all_ts_lists.append(rev.time_slots)
+            # inters = set.intersection(*[set(x) for x in all_ts_lists])
+            # if len(inters) > 0:
+            #     result += review_score
+    return result
+```
+%% Cell type:code id:dcd2472b tags:
+``` 
+def check(time_slot) -> bool:
+    '''
+    Takes a time slot (i.e. a list of papers) and checks if there are any contradictions, i.e., for every paper
+    with at least one reviewer, there is a time slot where at least one reviewer for every paper is available.
+    '''
+    all_ts_lists = []
+    for p in time_slot:
+        all_p_ts = []
+        for rev in p.reviewers:
+            if len(rev.time_slots) > 0:
+                all_p_ts.extend(rev.time_slots)
+        all_ts_lists.append(all_p_ts)
+    if len(all_ts_lists) > 0:
+        result = len(set.intersection(*[set(x) for x in all_ts_lists]))
+        return result >= 1
+    else:
+        return True
+```
+%% Cell type:markdown id:a09dfb77 tags:
+## Test data
+%% Cell type:code id:61224383 tags:
+``` 
+r1 = Reviewer('Reviewer1', [0, 1])
+r2 = Reviewer('Reviewer2', [0, 1])
+r3 = Reviewer('Reviewer3', [0])
+r4 = Reviewer('Reviewer4', [1])
+r5 = Reviewer('Reviewer5', [2])
+r6 = Reviewer('Reviewer6', [2])
+r7 = Reviewer('Reviewer7', [2])
+r8 = Reviewer('Reviewer8', [2])
+r9 = Reviewer('Reviewer9', [0])
+r10 = Reviewer('Reviewer10', [1])
+papers = []
+papers.append(Paper('Paper1', [r1, r2], 2))
+papers.append(Paper('Paper2', [r1], 0))
+papers.append(Paper('Paper3', [r3, r4], 0))
+papers.append(Paper('Paper4', [r5, r6], -2))
+papers.append(Paper('Paper5', [r7, r8], 0))
+papers.append(Paper('Paper6', [r9, r10], -1))
+```
+%% Cell type:markdown id:56c78a45 tags:
+## The Model
+%% Cell type:code id:daafd13a tags:
+``` 
+max_time_slots = 2
+max_slot_size = 2
+all_time_slots = [tuple(c) for c in pulp.allcombinations(papers, max_slot_size)]
+possible_time_slots = []
+for ts in all_time_slots:
+    if check(ts):
+        possible_time_slots.append(ts)
+x = pulp.LpVariable.dicts(
+    "time_slot", possible_time_slots, lowBound=0, upBound=1, cat=pulp.LpInteger
+)
+review_model = pulp.LpProblem("Review Model", pulp.LpMaximize)
+review_model += pulp.lpSum([score(ts) * x[ts] for ts in possible_time_slots])
+review_model += (
+    pulp.lpSum([x[ts] for ts in possible_time_slots]) <= max_time_slots,
+    "Maximum_number_of_time_slots",
+)
+for paper in papers:
+    review_model += (
+        pulp.lpSum([x[ts] for ts in possible_time_slots if paper in ts]) <= 1,
+        f"Must_assign_{paper}",
+    )
+status = review_model.solve()
+if status == 1:
+    slot_count = 1
+    print(f"The best time slots are:")
+    for ts in possible_time_slots:
+        if x[ts].value() == 1.0:
+            print(f'\nSlot {slot_count}:')
+            for p in ts:
+                print(f'\n{p}')
+            slot_count += 1
+else:
+    print('Problem could not be solved!')
+```
+%% Cell type:code id:856d62e2 tags:
+``` 
+```