Commit e4786a46 authored by Konstantin Schulz's avatar Konstantin Schulz

added more tags, a version field and search bar for zenodo records

parent e952e195
Pipeline #16521 passed with stages
in 4 minutes and 49 seconds
......@@ -32,8 +32,10 @@ Or combine both commands in one line: `pip list -o --format=freeze | grep -v '^\
----------------------------------------------------------------
# Database
To use the database for the first time:
If you use Postgres, you need to create the database "callidus" manually: `CREATE DATABASE callidus;`
To autogenerate a new migration script:
1. Start the Docker container with the database: `docker-compose run -p 5432:5432 -d db`
1. Start the Docker container with the database: `docker-compose up -p 5432:5432 -d db`
2. Create a new migration: `flask db migrate`.
3. Perform a migration...
- ... to a newer version: `flask db upgrade`.
......
"""The main module for the application. It contains the application factory and provides access to the database."""
import logging
import os
import pickle
import sys
import uuid
from logging.handlers import RotatingFileHandler
......@@ -57,7 +58,6 @@ def create_app(cfg: Type[Config] = Config) -> Flask:
app.register_blueprint(services_bp)
from mcserver.app.api import bp as api_bp
app.register_blueprint(api_bp)
init_logging(app, Config.LOG_PATH_MCSERVER)
return app
......@@ -79,36 +79,52 @@ def full_init(app: Flask, cfg: Type[Config] = Config) -> None:
start_updater(app)
def get_api_specification() -> dict:
""" Reads, parses and caches the OpenAPI specification including all shared models. """
parser: prance.ResolvingParser = prance.ResolvingParser(Config.API_SPEC_MCSERVER_FILE_PATH, lazy=True, strict=False)
api_last_modified_time: float = os.path.getmtime(Config.API_SPEC_MCSERVER_FILE_PATH)
models_last_modified_time: float = os.path.getmtime(Config.API_SPEC_MODELS_YAML_FILE_PATH)
lmt: float = api_last_modified_time + models_last_modified_time
if os.path.exists(Config.API_SPEC_CACHE_PATH):
cache: dict = pickle.load(open(Config.API_SPEC_CACHE_PATH, "rb"))
if cache["lmt"] == lmt:
parser.specification = cache["spec"]
if not parser.specification:
parser.parse()
pickle.dump(dict(lmt=lmt, spec=parser.specification), open(Config.API_SPEC_CACHE_PATH, "wb+"))
return parser.specification
def init_app_common(cfg: Type[Config] = Config) -> Flask:
""" Initializes common Flask parts, e.g. CORS, configuration, database, migrations and custom corpora."""
connexion_app: FlaskApp = connexion.FlaskApp(
__name__, port=cfg.HOST_PORT, specification_dir=Config.MC_SERVER_DIRECTORY)
parser = prance.ResolvingParser(Config.API_SPEC_MCSERVER_FILE_PATH, lazy=True, strict=False)
parser.parse()
connexion_app.add_api(parser.specification)
spec: dict = get_api_specification()
connexion_app.add_api(spec)
apply_event_handlers(connexion_app)
app: Flask = connexion_app.app
# allow CORS requests for all API routes
CORS(app) # , resources=r"/*"
app.config.from_object(cfg)
app.app_context().push()
init_logging(app, Config.LOG_PATH_MCSERVER)
db.init_app(app)
migrate.init_app(app, db)
db.create_all()
from mcserver.app.services.databaseService import DatabaseService
DatabaseService.init_db_alembic()
if not cfg.TESTING:
DatabaseService.init_db_alembic(app)
from mcserver.app.services.textService import TextService
TextService.init_proper_nouns_list()
TextService.init_stop_words_latin()
if not Config.TESTING:
if not cfg.TESTING:
full_init(app, cfg)
return app
def init_logging(app: Flask, log_file_path: str):
""" Initializes logging for a given Flask application. """
file_handler: RotatingFileHandler = RotatingFileHandler(log_file_path, maxBytes=1000 * 1000,
backupCount=3)
file_handler: RotatingFileHandler = RotatingFileHandler(log_file_path, maxBytes=1000 * 1000, backupCount=3)
log_level: int = logging.INFO
file_handler.setLevel(log_level)
app.logger.addHandler(file_handler)
......@@ -147,7 +163,7 @@ def shutdown_session(exception=None):
db: SQLAlchemy = create_database()
migrate: Migrate = Migrate(directory=Config.MIGRATIONS_DIRECTORY)
if not hasattr(open_alchemy.models, Config.DATABASE_TABLE_CORPUS):
# do this _BEFORE_ you add any APIs to your application
# initialize the database and models _BEFORE_ you add any APIs to your application
init_yaml(Config.API_SPEC_MODELS_YAML_FILE_PATH, base=db.Model,
models_filename=os.path.join(Config.MC_SERVER_DIRECTORY, "models_auto.py"))
......
......@@ -15,15 +15,18 @@ def get():
record_properties: Set[str] = set([x[0] for x in record_members if type(x[1]) == property])
author_property: str = "Autor:"
work_property: str = "Werk:"
version_property: str = "Version:"
sickle: Sickle = Sickle(Config.ZENODO_API_URL)
records: List[Record] = list(sickle.ListRecords(metadataPrefix='oai_dc', set=Config.ZENODO_SET))
zenodo_records: List[ZenodoRecord] = []
for record in records:
md: dict = record.metadata
params: Dict[str, Any] = {rp: md.get(rp, [None])[0] for rp in record_properties}
params: Dict[str, Any] = {rp: md.get(rp, []) for rp in record_properties}
new_record: ZenodoRecord = ZenodoRecord(**params)
tags: List[str] = md.get("subject", [])
new_record.author = next((x.split(author_property)[-1] for x in tags if x.startswith(author_property)), "")
new_record.work = next((x.split(work_property)[-1] for x in tags if x.startswith(work_property)), "")
new_record.author = [next((x.split(author_property)[-1] for x in tags if x.startswith(author_property)), "")]
new_record.work = [next((x.split(work_property)[-1] for x in tags if x.startswith(work_property)), "")]
new_record.version = \
[next((x.split(version_property)[-1] for x in tags if x.startswith(version_property)), "1.0")]
zenodo_records.append(new_record)
return NetworkService.make_json_response([x.to_dict() for x in zenodo_records])
# Ignore everything in this directory
*.json
*.*
# Except this file
!.gitignore
\ No newline at end of file
import ntpath
import os
import pickle
from collections import OrderedDict
from typing import List, Tuple, Set, Dict
from typing import List, Set, Dict
import conllu
import rapidjson as json
from conllu import TokenList
......@@ -185,18 +186,16 @@ class CustomCorpusService:
cc: CustomCorpus = next(x for x in CustomCorpusService.custom_corpora if x.corpus.source_urn in urn)
annotations: List[TokenList] = []
file_name: str = ntpath.basename(cc.file_path)
cache_file_path: str = os.path.join(Config.TREEBANKS_CACHE_DIRECTORY, file_name + ".json")
cache_file_path: str = os.path.join(Config.TREEBANKS_CACHE_DIRECTORY, file_name + ".pickle")
if os.path.exists(cache_file_path):
try:
annotations = [TokenList(tokens=x["tokens"], metadata=x["metadata"]) for x in
json.loads(FileService.get_file_content(cache_file_path))]
annotations = pickle.load(open(cache_file_path, "rb"))
except ValueError:
pass
if not annotations:
annotations = conllu.parse(FileService.get_file_content(cc.file_path))
# need to cache the result because the syntax parser is so slow
with open(cache_file_path, "w+") as f:
f.write(json.dumps(dict(tokens=x.tokens, metadata=x.metadata) for x in annotations))
pickle.dump(annotations, open(cache_file_path, "wb+"))
if cc.corpus.source_urn != urn:
# the given URN points to a sub-graph, so we make a selection from our annotations
annotations = CustomCorpusService.get_treebank_sub_annotations(urn, annotations, cc)
......
from datetime import datetime
from typing import Union, Any
from flask_migrate import stamp, upgrade
from flask import Flask
import flask_migrate
from sqlalchemy.exc import OperationalError, InvalidRequestError
from sqlalchemy.orm import Query
from mcserver.app import db
......@@ -25,11 +27,13 @@ class DatabaseService:
return db.engine.dialect.has_table(db.engine, table)
@staticmethod
def init_db_alembic() -> None:
def init_db_alembic(app: Flask) -> None:
"""In Docker, the alembic version is not initially written to the database, so we need to set it manually."""
if not DatabaseService.has_table(Config.DATABASE_TABLE_ALEMBIC):
stamp(directory=Config.MIGRATIONS_DIRECTORY)
upgrade(directory=Config.MIGRATIONS_DIRECTORY)
flask_migrate.stamp(directory=Config.MIGRATIONS_DIRECTORY)
flask_migrate.upgrade(directory=Config.MIGRATIONS_DIRECTORY)
app.logger.info("Database revision ID is ")
flask_migrate.current(directory=Config.MIGRATIONS_DIRECTORY)
@staticmethod
def init_db_update_info() -> None:
......
......@@ -28,6 +28,7 @@ class Config(object):
# dirty hack to get the app working either with the Gunicorn/Flask CLI or the PyCharm debugger
MC_SERVER_APP_DIRECTORY = os.path.join(MC_SERVER_DIRECTORY, "app") if os.path.split(
MC_SERVER_DIRECTORY)[-1] == "mcserver" else MC_SERVER_DIRECTORY
CACHE_DIRECTORY = os.path.join(MC_SERVER_APP_DIRECTORY, "cache")
IS_DOCKER = os.environ.get("IS_THIS_A_DOCKER_CONTAINER", False)
MC_FRONTEND_DIRECTORY = os.path.join(Path(MC_SERVER_DIRECTORY).parent.parent, "mc_frontend")
MC_FRONTEND_SRC_DIRECTORY = os.path.join(MC_FRONTEND_DIRECTORY, "src")
......@@ -38,13 +39,13 @@ class Config(object):
TREEBANKS_PATH = os.path.join(ASSETS_DIRECTORY, "treebanks")
TREEBANKS_PROIEL_PATH = os.path.join(TREEBANKS_PATH, "proiel")
API_SPEC_CACHE_PATH = os.path.join(CACHE_DIRECTORY, "openapi_spec.pickle")
API_SPEC_MCSERVER_FILE_PATH = os.path.join(MC_SERVER_DIRECTORY, "mcserver_api.yaml")
API_SPEC_MODELS_YAML_FILE_PATH = os.path.join(Path(MC_SERVER_DIRECTORY).parent, "openapi_models.yaml")
AQL_CASE = "/.*Case=.*/"
AQL_DEP = "->dep"
AQL_DEPREL = "deprel"
AQL_TOK = "tok"
CACHE_DIRECTORY = os.path.join(MC_SERVER_APP_DIRECTORY, "cache")
CONLLU2SVG_PATH_LINUX = os.path.join(ASSETS_DIRECTORY, "conllu2svg_linux64")
CONLLU2SVG_PATH_OSX = os.path.join(ASSETS_DIRECTORY, "conllu2svg_osx")
CORPUS_STORAGE_MANAGER: CorpusStorageManager = None
......
......@@ -352,13 +352,11 @@ def mcserver_app_api_vocabulary_api_post(frequency_upper_bound, query_urn, vocab
return 'do some magic!'
def mcserver_app_api_zenodo_api_get(last_update_time): # noqa: E501
def mcserver_app_api_zenodo_api_get(): # noqa: E501
"""Shows which exercises are available on Zenodo.
# noqa: E501
:param last_update_time: Time (in milliseconds) of the last update.
:type last_update_time: int
:rtype: List[ZenodoRecord]
"""
......
......@@ -15,38 +15,41 @@ class ZenodoRecord(Model):
Do not edit the class manually.
"""
def __init__(self, author=None, contributor=None, creator=None, description=None, identifier=None, language=None, subject=None, title=None, work=None): # noqa: E501
def __init__(self, author=None, contributor=None, creator=None, description=None, identifier=None, language=None, subject=None, title=None, version=None, work=None): # noqa: E501
"""ZenodoRecord - a model defined in OpenAPI
:param author: The author of this ZenodoRecord. # noqa: E501
:type author: str
:param contributor: The contributor of this ZenodoRecord. # noqa: E501
:type contributor: str
:type contributor: List[str]
:param creator: The creator of this ZenodoRecord. # noqa: E501
:type creator: str
:type creator: List[str]
:param description: The description of this ZenodoRecord. # noqa: E501
:type description: str
:type description: List[str]
:param identifier: The identifier of this ZenodoRecord. # noqa: E501
:type identifier: str
:type identifier: List[str]
:param language: The language of this ZenodoRecord. # noqa: E501
:type language: str
:type language: List[str]
:param subject: The subject of this ZenodoRecord. # noqa: E501
:type subject: List[str]
:param title: The title of this ZenodoRecord. # noqa: E501
:type title: str
:type title: List[str]
:param version: The version of this ZenodoRecord. # noqa: E501
:type version: List[str]
:param work: The work of this ZenodoRecord. # noqa: E501
:type work: str
:type work: List[str]
"""
self.openapi_types = {
'author': str,
'contributor': str,
'creator': str,
'description': str,
'identifier': str,
'language': str,
'contributor': List[str],
'creator': List[str],
'description': List[str],
'identifier': List[str],
'language': List[str],
'subject': List[str],
'title': str,
'work': str
'title': List[str],
'version': List[str],
'work': List[str]
}
self.attribute_map = {
......@@ -58,6 +61,7 @@ class ZenodoRecord(Model):
'language': 'language',
'subject': 'subject',
'title': 'title',
'version': 'version',
'work': 'work'
}
......@@ -69,6 +73,7 @@ class ZenodoRecord(Model):
self._language = language
self._subject = subject
self._title = title
self._version = version
self._work = work
@classmethod
......@@ -109,10 +114,9 @@ class ZenodoRecord(Model):
def contributor(self):
"""Gets the contributor of this ZenodoRecord.
People that contributed something to the record. # noqa: E501
:return: The contributor of this ZenodoRecord.
:rtype: str
:rtype: List[str]
"""
return self._contributor
......@@ -120,10 +124,9 @@ class ZenodoRecord(Model):
def contributor(self, contributor):
"""Sets the contributor of this ZenodoRecord.
People that contributed something to the record. # noqa: E501
:param contributor: The contributor of this ZenodoRecord.
:type contributor: str
:type contributor: List[str]
"""
self._contributor = contributor
......@@ -132,10 +135,9 @@ class ZenodoRecord(Model):
def creator(self):
"""Gets the creator of this ZenodoRecord.
People that created the record. # noqa: E501
:return: The creator of this ZenodoRecord.
:rtype: str
:rtype: List[str]
"""
return self._creator
......@@ -143,10 +145,9 @@ class ZenodoRecord(Model):
def creator(self, creator):
"""Sets the creator of this ZenodoRecord.
People that created the record. # noqa: E501
:param creator: The creator of this ZenodoRecord.
:type creator: str
:type creator: List[str]
"""
self._creator = creator
......@@ -155,10 +156,9 @@ class ZenodoRecord(Model):
def description(self):
"""Gets the description of this ZenodoRecord.
Description of the record. # noqa: E501
:return: The description of this ZenodoRecord.
:rtype: str
:rtype: List[str]
"""
return self._description
......@@ -166,10 +166,9 @@ class ZenodoRecord(Model):
def description(self, description):
"""Sets the description of this ZenodoRecord.
Description of the record. # noqa: E501
:param description: The description of this ZenodoRecord.
:type description: str
:type description: List[str]
"""
self._description = description
......@@ -178,10 +177,9 @@ class ZenodoRecord(Model):
def identifier(self):
"""Gets the identifier of this ZenodoRecord.
Identifier for easy access to the record. # noqa: E501
:return: The identifier of this ZenodoRecord.
:rtype: str
:rtype: List[str]
"""
return self._identifier
......@@ -189,10 +187,9 @@ class ZenodoRecord(Model):
def identifier(self, identifier):
"""Sets the identifier of this ZenodoRecord.
Identifier for easy access to the record. # noqa: E501
:param identifier: The identifier of this ZenodoRecord.
:type identifier: str
:type identifier: List[str]
"""
self._identifier = identifier
......@@ -201,10 +198,9 @@ class ZenodoRecord(Model):
def language(self):
"""Gets the language of this ZenodoRecord.
Language of the materials in the record. # noqa: E501
:return: The language of this ZenodoRecord.
:rtype: str
:rtype: List[str]
"""
return self._language
......@@ -212,10 +208,9 @@ class ZenodoRecord(Model):
def language(self, language):
"""Sets the language of this ZenodoRecord.
Language of the materials in the record. # noqa: E501
:param language: The language of this ZenodoRecord.
:type language: str
:type language: List[str]
"""
self._language = language
......@@ -245,10 +240,9 @@ class ZenodoRecord(Model):
def title(self):
"""Gets the title of this ZenodoRecord.
Title of the record. # noqa: E501
:return: The title of this ZenodoRecord.
:rtype: str
:rtype: List[str]
"""
return self._title
......@@ -256,22 +250,41 @@ class ZenodoRecord(Model):
def title(self, title):
"""Sets the title of this ZenodoRecord.
Title of the record. # noqa: E501
:param title: The title of this ZenodoRecord.
:type title: str
:type title: List[str]
"""
self._title = title
@property
def version(self):
"""Gets the version of this ZenodoRecord.
:return: The version of this ZenodoRecord.
:rtype: List[str]
"""
return self._version
@version.setter
def version(self, version):
"""Sets the version of this ZenodoRecord.
:param version: The version of this ZenodoRecord.
:type version: List[str]
"""
self._version = version
@property
def work(self):
"""Gets the work of this ZenodoRecord.
Name of the work that serves as a basis or target for this record. # noqa: E501
:return: The work of this ZenodoRecord.
:rtype: str
:rtype: List[str]
"""
return self._work
......@@ -279,10 +292,9 @@ class ZenodoRecord(Model):
def work(self, work):
"""Sets the work of this ZenodoRecord.
Name of the work that serves as a basis or target for this record. # noqa: E501
:param work: The work of this ZenodoRecord.
:type work: str
:type work: List[str]
"""
self._work = work
......@@ -604,14 +604,6 @@ paths:
/zenodo:
get:
operationId: mcserver_app_api_zenodo_api_get
parameters:
- description: Time (in milliseconds) of the last update.
in: query
name: last_update_time
required: true
schema:
example: 123456789
type: integer
responses:
"200":
content:
......@@ -1447,18 +1439,36 @@ components:
ZenodoRecord:
description: Record with header and metadata from Zenodo.
example:
identifier: https://zenodo.org/record/4515301
creator: Glockemann, Brunhild
contributor: Forst, Alexandra
identifier:
- https://zenodo.org/record/4515301
- https://zenodo.org/record/4515301
creator:
- Glockemann, Brunhild
- Glockemann, Brunhild
contributor:
- Forst, Alexandra
- Forst, Alexandra
author: Caesar
subject:
- Latein
- Latein
work: Bellum civile II, 43-44
description: Die Materialien dieser Lerneinheit sollen Lateinstudierende bei
der Wortschatzarbeit unterstützen.
language: deu
title: Wortschatztraining zu Caesar, Bellum civile II, 43 bis 44
work:
- Bellum civile II, 43-44
- Bellum civile II, 43-44
description:
- Die Materialien dieser Lerneinheit sollen Lateinstudierende bei der Wortschatzarbeit
unterstützen.
- Die Materialien dieser Lerneinheit sollen Lateinstudierende bei der Wortschatzarbeit
unterstützen.
language:
- deu
- deu
title:
- Wortschatztraining zu Caesar, Bellum civile II, 43 bis 44
- Wortschatztraining zu Caesar, Bellum civile II, 43 bis 44
version:
- "2.0"
- "2.0"
properties:
author:
description: Author of the text that serves as a basis or target for this
......@@ -1466,26 +1476,36 @@ components:
example: Caesar
type: string
contributor:
description: People that contributed something to the record.
example: Forst, Alexandra
type: string
items:
description: People that contributed something to the record.
example: Forst, Alexandra
type: string
type: array
creator:
description: People that created the record.
example: Glockemann, Brunhild
type: string
items:
description: People that created the record.
example: Glockemann, Brunhild
type: string
type: array
description:
description: Description of the record.
example: Die Materialien dieser Lerneinheit sollen Lateinstudierende bei
der Wortschatzarbeit unterstützen.
type: string
items:
description: Description of the record.
example: Die Materialien dieser Lerneinheit sollen Lateinstudierende bei
der Wortschatzarbeit unterstützen.
type: string
type: array
identifier:
description: Identifier for easy access to the record.
example: https://zenodo.org/record/4515301
type: string
items:
description: Identifier for easy access to the record.
example: https://zenodo.org/record/4515301
type: string
type: array
language:
description: Language of the materials in the record.
example: deu
type: string
items:
description: Language of the materials in the record.
example: deu
type: string
type: array
subject:
items:
description: Tag that classifies the record.
......@@ -1493,14 +1513,24 @@ components:
type: string