Skip to content
Snippets Groups Projects
Commit ccd622a9 authored by Frederik Arnold's avatar Frederik Arnold
Browse files

Update similarity training

parent 77e2adc4
No related branches found
No related tags found
No related merge requests found
......@@ -36,9 +36,6 @@ def main():
with open(join(input_path, 'train_set.tsv'), 'r') as train_file:
reader = csv.reader(train_file, delimiter='\t')
# skip first row (header)
# next(reader, None)
for row in reader:
ie = InputExample(texts=[row[0], row[1], row[2]])
train_examples.append(ie)
......@@ -49,35 +46,21 @@ def main():
with open(join(input_path, 'val_set.tsv'), 'r') as train_file:
reader = csv.reader(train_file, delimiter='\t')
# skip first row (header)
# next(reader, None)
for row in reader:
val_anchor.append(row[0])
val_positive.append(row[1])
val_negative.append(row[2])
# model = SentenceTransformer('deutsche-telekom/gbert-large-paraphrase-cosine')
model = SentenceTransformer(model_name)
# Define your train examples. You need more than just two examples...
# train_examples = [InputExample(texts=['My first sentence', 'My second sentence'], label=0.8),
# InputExample(texts=['Another pair', 'Unrelated sentence'], label=0.3)]
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=8)
# train_loss = losses.CosineSimilarityLoss(model)
train_loss = losses.TripletLoss(model=model)
# train_loss = losses.BatchHardSoftMarginTripletLoss(
# model=model,
# distance_metric=BatchHardTripletLossDistanceFunction.eucledian_distance,
# )
# evaluator = evaluation.EmbeddingSimilarityEvaluator(val_anchor, sen, scores)
# evaluator = evaluation.TripletEvaluator(val_anchor, val_positive, val_negative)
evaluator = evaluation.TripletEvaluator(val_anchor, val_positive, val_negative)
# Tune the model
model.fit(train_objectives=[(train_dataloader, train_loss)], epochs=3, warmup_steps=100, output_path=output_path)
model.fit(train_objectives=[(train_dataloader, train_loss)], epochs=3, warmup_steps=100,
evaluator=evaluator, evaluation_steps=10000, output_path=output_path)
if __name__ == '__main__':
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment