Skip to content
Snippets Groups Projects
Commit f6a3ac82 authored by Frederik Arnold's avatar Frederik Arnold
Browse files

Refactor

parent 024a30c1
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id:4e9207f1 tags: %% Cell type:markdown id:4e9207f1 tags:
# Optimization Example # Optimization Example
This notebook shows a example of how to user linear programming to solve following optimization problem. This notebook shows a example of how to user linear programming to solve following optimization problem.
## Problem Description ## Problem Description
Given a set of papers, a set of reviewers, which are a assigned to the papers a and set of time slots from which the reviewers selected their availabilty, we want to assign the papers to time slots in such a way that the optimal number of papers can be discussed with the reviewers. Papers with disagreeing reviews, i.e. the paper score is close to 0, should be assigned with higher priority. Given a set of papers, a set of reviewers, which are a assigned to the papers a and set of time slots from which the reviewers selected their availabilty, we want to assign the papers to time slots in such a way that the optimal number of papers can be discussed with the reviewers. Papers with disagreeing reviews, i.e. the paper score is close to 0, should be assigned with higher priority.
## General approach ## General approach
In this notebook, we present two approaches. The first one does not scale very well and gets very slow with growing datasets. The second one scales well. In this notebook, we present two approaches. The first one does not scale very well and gets very slow with growing datasets. The second one scales well.
### Slow approach ### Slow approach
1. Create all possible combinations of papers given the maximum number of papers for a time slot. 1. Create all possible combinations of papers given the maximum number of papers for a time slot.
2. Remove combinations which are impossible to satisfy because authors have conflicting time slot preferences. 2. Remove combinations which are impossible to satisfy because authors have conflicting time slot preferences.
3. Add a contraint to limit the number of time slots. 3. Add a contraint to limit the number of time slots.
4. Add a constraint to make sure that a paper can only appear in up to one time slot. 4. Add a constraint to make sure that a paper can only appear in up to one time slot.
5. Solve. 5. Solve.
### Fast approach ### Fast approach
1. Create a binary variable for every combination of paper and time slot. This variable indicates whether a paper is assigned to a time slot. 1. Create a binary variable for every combination of paper and time slot. This variable indicates whether a paper is assigned to a time slot.
2. Fix variables which should never be True to False. This is the case if there are time slot preferences but none match the given slot. 2. Fix variables which should never be True to False. This is the case if there are time slot preferences but none match the given slot.
3. Add a constraint to make sure that a paper can only appear in up to one time slot. 3. Add a constraint to make sure that a paper can only appear in up to one time slot.
4. Add a constraint to limit the slot size. 4. Add a constraint to limit the slot size.
5. Solve 5. Solve
%% Cell type:markdown id:d43e2d19 tags: %% Cell type:markdown id:d43e2d19 tags:
## Dependencies ## Dependencies
%% Cell type:code id:fa369056 tags: %% Cell type:code id:fa369056 tags:
``` ```
!pip install pulp !pip install pulp
``` ```
%% Cell type:code id:7d617f38 tags: %% Cell type:code id:7d617f38 tags:
``` ```
import pulp import pulp
import csv import csv
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import List from typing import List
``` ```
%% Cell type:markdown id:27e8cff1 tags: %% Cell type:markdown id:27e8cff1 tags:
## Data structures ## Data structures
%% Cell type:code id:7e971701 tags: %% Cell type:code id:7e971701 tags:
``` ```
@dataclass @dataclass
class Reviewer: class Reviewer:
id: str id: str
time_slots: List[int] time_slots: List[int]
def __str__(self) -> str: def __str__(self) -> str:
result = f'{self.id} ({self.time_slots})' result = f'{self.id} ({self.time_slots})'
return result return result
def __repr__(self) -> str: def __repr__(self) -> str:
return self.id return self.id
``` ```
%% Cell type:code id:846517b8 tags: %% Cell type:code id:846517b8 tags:
``` ```
@dataclass(eq=True, frozen=True) @dataclass(eq=True, frozen=True)
class Paper: class Paper:
id: str = field(hash=True) id: str = field(hash=True)
reviewers: List[Reviewer] = field(compare=False, hash=False) reviewers: List[Reviewer] = field(compare=False, hash=False)
score: float = field(compare=False, hash=False) score: float = field(compare=False, hash=False)
def add_reviewer(self, reviewer): def add_reviewer(self, reviewer):
self.reviewers.append(reviewer) self.reviewers.append(reviewer)
def __str__(self) -> str: def __str__(self) -> str:
result = f'{self.id} (score: {self.score})' result = f'{self.id} (score: {self.score})'
for rev in self.reviewers: for rev in self.reviewers:
result += f'\n{rev}' result += f'\n{rev}'
return result return result
def __repr__(self) -> str: def __repr__(self) -> str:
return self.id return self.id
``` ```
%% Cell type:markdown id:0dc4485a tags: %% Cell type:markdown id:0dc4485a tags:
## Load data ## Load data
%% Cell type:code id:c1c7b757 tags: %% Cell type:code id:c1c7b757 tags:
``` ```
def load_data(file_path): def load_data(file_path):
result = [] result = []
with open(file_path, 'r') as file: with open(file_path, 'r') as file:
reader = csv.reader(file, delimiter='\t') reader = csv.reader(file, delimiter='\t')
for row in reader: for row in reader:
if len(row) != 7: if len(row) != 7:
continue continue
reviewer_id = row[0] reviewer_id = row[0]
paper_id = row[1] paper_id = row[1]
score = float(row[3]) score = float(row[3])
time_slots = [] time_slots = []
if row[4] == '1': if row[4] == '1':
time_slots.append(0) time_slots.append(0)
if row[5] == '1': if row[5] == '1':
time_slots.append(1) time_slots.append(1)
if row[6] == '1': if row[6] == '1':
time_slots.append(2) time_slots.append(2)
reviewer = Reviewer(reviewer_id, time_slots) reviewer = Reviewer(reviewer_id, time_slots)
existing_paper = next((p for p in result if p.id == paper_id), None) existing_paper = next((p for p in result if p.id == paper_id), None)
if existing_paper: if existing_paper:
existing_paper.add_reviewer(reviewer) existing_paper.add_reviewer(reviewer)
else: else:
paper = Paper(paper_id, [reviewer], score) paper = Paper(paper_id, [reviewer], score)
result.append(paper) result.append(paper)
return result return result
papers = load_data('../data/optimization_data.tsv')
``` ```
%% Cell type:markdown id:36d5ab0d tags: %% Cell type:markdown id:b96637c3 tags:
## The Models
%% Cell type:markdown id:537d7040 tags:
### Slow approach
## Functions <div class="alert alert-warning">
%% Cell type:code id:d737af19 tags: **Warning**: Depending on the slot size and the number of papers, this approach gets very slow very fast. It is recommended to limit the number of papers and `max_slot_size`.
</div>
%% Cell type:code id:ea63212b tags:
``` ```
def score_time_slot(time_slot) -> int: def score_time_slot(time_slot) -> int:
''' '''
Takes a time slot (i.e. a list of papers) and calculates a score. Takes a time slot (i.e. a list of papers) and calculates a score.
The scores takes the number of papers in the time slot and the review score into account. The scores takes the number of papers in the time slot and the review score into account.
''' '''
paper_count = len(time_slot) paper_count = len(time_slot)
if paper_count == 1: if paper_count == 1:
if len(time_slot[0].reviewers) > 0: if len(time_slot[0].reviewers) > 0:
review_score = 2 - abs(time_slot[0].score) review_score = 2 - abs(time_slot[0].score)
return paper_count + review_score return paper_count + review_score
else: else:
return 0 return 0
result = paper_count result = paper_count
for p in time_slot: for p in time_slot:
review_score = 2 - abs(p.score) review_score = 2 - abs(p.score)
if len(p.reviewers) == 1: if len(p.reviewers) == 1:
result += review_score result += review_score
elif len(p.reviewers) > 1: elif len(p.reviewers) > 1:
result += review_score result += review_score
# for multiple reviewers, we could also take into account the number of reviewers or how many # for multiple reviewers, we could also take into account the number of reviewers or how many
# reviewers are available at the same time, but currently this is not used # reviewers are available at the same time, but currently this is not used
# all_ts_lists = [] # all_ts_lists = []
# for rev in p.reviewers: # for rev in p.reviewers:
# if len(rev.time_slots) > 0: # if len(rev.time_slots) > 0:
# all_ts_lists.append(rev.time_slots) # all_ts_lists.append(rev.time_slots)
# inters = set.intersection(*[set(x) for x in all_ts_lists]) # inters = set.intersection(*[set(x) for x in all_ts_lists])
# if len(inters) > 0: # if len(inters) > 0:
# result += review_score # result += review_score
return result return result
``` ```
%% Cell type:code id:067ea4ea tags: %% Cell type:code id:05dd3bc9 tags:
```
def score_paper(paper) -> int:
if len(paper.reviewers) > 0:
review_score = 2 - abs(paper.score)
return review_score
else:
return 0
```
%% Cell type:code id:23670286 tags:
``` ```
def check(time_slot) -> bool: def check(time_slot) -> bool:
''' '''
Takes a time slot (i.e. a list of papers) and checks if there are any contradictions, i.e., for every paper Takes a time slot (i.e. a list of papers) and checks if there are any contradictions, i.e., for every paper
with at least one reviewer, there is a time slot where at least one reviewer for every paper is available. with at least one reviewer, there is a time slot where at least one reviewer for every paper is available.
''' '''
all_ts_lists = [] all_ts_lists = []
for p in time_slot: for p in time_slot:
all_p_ts = [] all_p_ts = []
for rev in p.reviewers: for rev in p.reviewers:
if len(rev.time_slots) > 0: if len(rev.time_slots) > 0:
all_p_ts.extend(rev.time_slots) all_p_ts.extend(rev.time_slots)
all_ts_lists.append(all_p_ts) all_ts_lists.append(all_p_ts)
if len(all_ts_lists) > 0: if len(all_ts_lists) > 0:
result = len(set.intersection(*[set(x) for x in all_ts_lists])) result = len(set.intersection(*[set(x) for x in all_ts_lists]))
return result >= 1 return result >= 1
else: else:
return True return True
``` ```
%% Cell type:markdown id:b96637c3 tags:
## The Models
%% Cell type:markdown id:dd6a6099 tags:
### Slow approach
<div class="alert alert-warning">
**Warning**: Depending on the slot size and the number of papers, this approach gets very slow very fast. It is recommended to limit the number of papers and `max_slot_size`.
</div>
%% Cell type:code id:69c85176 tags: %% Cell type:code id:69c85176 tags:
``` ```
max_time_slots = 3 papers = load_data('../data/optimization_data.tsv')
max_slot_size = 5
# use the first 15 papers # use the first 15 papers
papers = papers[:15] papers = papers[:15]
max_time_slots = 3
max_slot_size = 5
all_time_slots = [c for c in pulp.allcombinations(papers, max_slot_size)] all_time_slots = [c for c in pulp.allcombinations(papers, max_slot_size)]
possible_time_slots = [] possible_time_slots = []
for ts in all_time_slots: for ts in all_time_slots:
if check(ts): if check(ts):
possible_time_slots.append(ts) possible_time_slots.append(ts)
x = pulp.LpVariable.dicts( x = pulp.LpVariable.dicts(
"time_slot", possible_time_slots, lowBound=0, upBound=1, cat=pulp.LpInteger "time_slot", possible_time_slots, lowBound=0, upBound=1, cat=pulp.LpInteger
) )
review_model = pulp.LpProblem("Review Model", pulp.LpMaximize) review_model = pulp.LpProblem("Review Model", pulp.LpMaximize)
review_model += pulp.lpSum([score_time_slot(ts) * x[ts] for ts in possible_time_slots]) review_model += pulp.lpSum([score_time_slot(ts) * x[ts] for ts in possible_time_slots])
review_model += ( review_model += (
pulp.lpSum([x[ts] for ts in possible_time_slots]) <= max_time_slots, pulp.lpSum([x[ts] for ts in possible_time_slots]) <= max_time_slots,
"Maximum_number_of_time_slots", "Maximum_number_of_time_slots",
) )
for paper in papers: for paper in papers:
review_model += ( review_model += (
pulp.lpSum([x[ts] for ts in possible_time_slots if paper in ts]) <= 1, pulp.lpSum([x[ts] for ts in possible_time_slots if paper in ts]) <= 1,
f"Must_assign_{paper}", f"Must_assign_{paper}",
) )
status = review_model.solve() status = review_model.solve()
if status == 1: if status == 1:
slot_count = 1 slot_count = 1
assigned_papers = [] assigned_papers = []
print(f"The best time slots are:") print(f"The best time slots are:")
for ts in possible_time_slots: for ts in possible_time_slots:
if x[ts].value() == 1.0: if x[ts].value() == 1.0:
print(f'\nSlot {slot_count}:') print(f'\nSlot {slot_count}:')
for p in ts: for p in ts:
assigned_papers.append(p) assigned_papers.append(p)
print(f'\n{p}') print(f'\n{p}')
slot_count += 1 slot_count += 1
diff = set(papers) - set(assigned_papers) diff = set(papers) - set(assigned_papers)
if len(diff) > 0: if len(diff) > 0:
print('\nThe following papers were not assigned to a time slot:') print('\nThe following papers were not assigned to a time slot:')
for p in diff: for p in diff:
print(f'\n{p}') print(f'\n{p}')
else: else:
print('Problem could not be solved!') print('Problem could not be solved!')
``` ```
%% Cell type:markdown id:0918db10 tags: %% Cell type:markdown id:7d5e9686 tags:
### Fast approach ### Fast approach
<div class="alert alert-info"> <div class="alert alert-info">
No limitations are needed for this approach. No limitations are needed for this approach.
</div> </div>
%% Cell type:code id:501af564 tags: %% Cell type:code id:9a065c01 tags:
``` ```
def score_paper(paper) -> int:
if len(paper.reviewers) > 0:
review_score = 2 - abs(paper.score)
return review_score
else:
return 0
```
%% Cell type:code id:fea427b4 tags:
```
papers = load_data('../data/optimization_data.tsv')
max_time_slots = 3 max_time_slots = 3
max_slot_size = 50 max_slot_size = 50
x = pulp.LpVariable.dicts( x = pulp.LpVariable.dicts(
"p_2_s", [(i, j) for i in range(0, max_time_slots) for j in papers], lowBound=0, upBound=1, cat=pulp.LpInteger "p_2_s", [(i, j) for i in range(0, max_time_slots) for j in papers], lowBound=0, upBound=1, cat=pulp.LpInteger
) )
for k, v in x.items(): for k, v in x.items():
paper_time_slots = set() paper_time_slots = set()
for reviewer in k[1].reviewers: for reviewer in k[1].reviewers:
paper_time_slots.update(reviewer.time_slots) paper_time_slots.update(reviewer.time_slots)
if len(paper_time_slots) > 0 and k[0] not in paper_time_slots: if len(paper_time_slots) > 0 and k[0] not in paper_time_slots:
v.setInitialValue(0) v.setInitialValue(0)
v.fixValue() v.fixValue()
review_model = pulp.LpProblem("Review-Model", pulp.LpMaximize) review_model = pulp.LpProblem("Review-Model", pulp.LpMaximize)
review_model += pulp.lpSum([x[i, j] * score_paper(j) + x[i, j] for i in range(0, max_time_slots) for j in papers]) review_model += pulp.lpSum([x[i, j] * score_paper(j) + x[i, j] for i in range(0, max_time_slots) for j in papers])
for paper in papers: for paper in papers:
review_model += ( review_model += (
pulp.lpSum([x[i, paper] for i in range(0, max_time_slots)]) <= 1, pulp.lpSum([x[i, paper] for i in range(0, max_time_slots)]) <= 1,
f"Must_assign_{paper.id}", f"Must_assign_{paper.id}",
) )
for i in range(0, max_time_slots): for i in range(0, max_time_slots):
review_model += ( review_model += (
pulp.lpSum([x[i, p] for p in papers]) <= max_slot_size, pulp.lpSum([x[i, p] for p in papers]) <= max_slot_size,
f"Max_slot_size_{i}", f"Max_slot_size_{i}",
) )
status = review_model.solve() status = review_model.solve()
if status == 1: if status == 1:
print(f"The best time slots are:") print(f"The best time slots are:")
assigned_papers = [] assigned_papers = []
for i in range(0, max_time_slots): for i in range(0, max_time_slots):
print(f'\nSlot {i+1}:') print(f'\nSlot {i+1}:')
for p in papers: for p in papers:
if x[i, p].value() == 1.0: if x[i, p].value() == 1.0:
assigned_papers.append(p) assigned_papers.append(p)
print(f'\n{p}') print(f'\n{p}')
diff = set(papers) - set(assigned_papers) diff = set(papers) - set(assigned_papers)
if len(diff) > 0: if len(diff) > 0:
print('\nThe following papers were not assigned to a time slot:') print('\nThe following papers were not assigned to a time slot:')
for p in diff: for p in diff:
print(f'\n{p}') print(f'\n{p}')
else: else:
print('Problem could not be solved!') print('Problem could not be solved!')
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment