Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Open sidebar
callidus
Machina Callida
Commits
35d4aa7a
Commit
35d4aa7a
authored
May 27, 2020
by
Konstantin Schulz
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added frequency analysis to the OpenAPI specification
parent
0863b577
Pipeline
#11727
failed with stages
in 2 minutes and 46 seconds
Changes
51
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
Showing
51 changed files
with
2350 additions
and
1584 deletions
+2350
-1584
mc_backend/csm/app/api/__init__.py
mc_backend/csm/app/api/__init__.py
+1
-2
mc_backend/csm/app/api/corpusStorageManagerAPI.py
mc_backend/csm/app/api/corpusStorageManagerAPI.py
+2
-1
mc_backend/csm/app/api/frequencyAPI.py
mc_backend/csm/app/api/frequencyAPI.py
+13
-26
mc_backend/csm/csm_api.yaml
mc_backend/csm/csm_api.yaml
+31
-0
mc_backend/mcserver/app/__init__.py
mc_backend/mcserver/app/__init__.py
+9
-4
mc_backend/mcserver/app/api/__init__.py
mc_backend/mcserver/app/api/__init__.py
+1
-7
mc_backend/mcserver/app/api/exerciseAPI.py
mc_backend/mcserver/app/api/exerciseAPI.py
+3
-2
mc_backend/mcserver/app/api/exerciseListAPI.py
mc_backend/mcserver/app/api/exerciseListAPI.py
+30
-49
mc_backend/mcserver/app/api/fileAPI.py
mc_backend/mcserver/app/api/fileAPI.py
+46
-67
mc_backend/mcserver/app/api/frequencyAPI.py
mc_backend/mcserver/app/api/frequencyAPI.py
+6
-20
mc_backend/mcserver/app/api/staticExercisesAPI.py
mc_backend/mcserver/app/api/staticExercisesAPI.py
+6
-3
mc_backend/mcserver/app/models.py
mc_backend/mcserver/app/models.py
+10
-47
mc_backend/mcserver/app/services/annotationService.py
mc_backend/mcserver/app/services/annotationService.py
+14
-14
mc_backend/mcserver/app/services/corpusService.py
mc_backend/mcserver/app/services/corpusService.py
+14
-13
mc_backend/mcserver/app/services/fileService.py
mc_backend/mcserver/app/services/fileService.py
+7
-6
mc_backend/mcserver/app/services/frequencyService.py
mc_backend/mcserver/app/services/frequencyService.py
+48
-41
mc_backend/mcserver/app/services/xmlService.py
mc_backend/mcserver/app/services/xmlService.py
+1
-1
mc_backend/mcserver/config.py
mc_backend/mcserver/config.py
+6
-5
mc_backend/mcserver/mcserver_api.yaml
mc_backend/mcserver/mcserver_api.yaml
+144
-581
mc_backend/mcserver/migrations/env.py
mc_backend/mcserver/migrations/env.py
+1
-1
mc_backend/mocks.py
mc_backend/mocks.py
+3
-3
mc_backend/openapi/openapi_server/controllers/default_controller.py
.../openapi/openapi_server/controllers/default_controller.py
+77
-1
mc_backend/openapi/openapi_server/models/__init__.py
mc_backend/openapi/openapi_server/models/__init__.py
+4
-2
mc_backend/openapi/openapi_server/models/file_type.py
mc_backend/openapi/openapi_server/models/file_type.py
+44
-0
mc_backend/openapi/openapi_server/models/frequency_item.py
mc_backend/openapi/openapi_server/models/frequency_item.py
+8
-6
mc_backend/openapi/openapi_server/models/inline_object.py
mc_backend/openapi/openapi_server/models/inline_object.py
+150
-0
mc_backend/openapi/openapi_server/models/inline_response200.py
...ckend/openapi/openapi_server/models/inline_response200.py
+24
-284
mc_backend/openapi/openapi_server/models/phenomenon.py
mc_backend/openapi/openapi_server/models/phenomenon.py
+44
-0
mc_backend/openapi/openapi_server/models/static_exercise.py
mc_backend/openapi/openapi_server/models/static_exercise.py
+94
-0
mc_backend/openapi/openapi_server/openapi/openapi.yaml
mc_backend/openapi/openapi_server/openapi/openapi.yaml
+376
-291
mc_backend/openapi_models.yaml
mc_backend/openapi_models.yaml
+600
-0
mc_backend/requirements.txt
mc_backend/requirements.txt
+2
-0
mc_backend/tests.py
mc_backend/tests.py
+44
-38
mc_frontend/openapi/api/default.service.ts
mc_frontend/openapi/api/default.service.ts
+281
-4
mc_frontend/openapi/model/fileType.ts
mc_frontend/openapi/model/fileType.ts
+25
-0
mc_frontend/openapi/model/frequencyItem.ts
mc_frontend/openapi/model/frequencyItem.ts
+3
-2
mc_frontend/openapi/model/inlineObject.ts
mc_frontend/openapi/model/inlineObject.ts
+33
-0
mc_frontend/openapi/model/inlineResponse200.ts
mc_frontend/openapi/model/inlineResponse200.ts
+24
-0
mc_frontend/openapi/model/models.ts
mc_frontend/openapi/model/models.ts
+4
-2
mc_frontend/openapi/model/phenomenon.ts
mc_frontend/openapi/model/phenomenon.ts
+25
-0
mc_frontend/openapi/model/staticExercise.ts
mc_frontend/openapi/model/staticExercise.ts
+27
-0
mc_frontend/src/app/corpus.service.spec.ts
mc_frontend/src/app/corpus.service.spec.ts
+14
-13
mc_frontend/src/app/corpus.service.ts
mc_frontend/src/app/corpus.service.ts
+17
-17
mc_frontend/src/app/exercise-parameters/exercise-parameters.page.spec.ts
.../app/exercise-parameters/exercise-parameters.page.spec.ts
+9
-8
mc_frontend/src/app/exercise-parameters/exercise-parameters.page.ts
...d/src/app/exercise-parameters/exercise-parameters.page.ts
+3
-4
mc_frontend/src/app/models/enum.ts
mc_frontend/src/app/models/enum.ts
+2
-9
mc_frontend/src/app/models/mockMC.ts
mc_frontend/src/app/models/mockMC.ts
+3
-2
mc_frontend/src/app/models/phenomenonMap.ts
mc_frontend/src/app/models/phenomenonMap.ts
+2
-2
mc_frontend/src/app/models/queryMC.ts
mc_frontend/src/app/models/queryMC.ts
+2
-1
mc_frontend/src/app/preview/preview.page.spec.ts
mc_frontend/src/app/preview/preview.page.spec.ts
+1
-1
mc_frontend/src/app/preview/preview.page.ts
mc_frontend/src/app/preview/preview.page.ts
+12
-4
No files found.
mc_backend/csm/app/api/__init__.py
View file @
35d4aa7a
...
...
@@ -7,14 +7,13 @@ from mcserver import Config
bp
=
Blueprint
(
"api"
,
__name__
)
api
=
Api
(
bp
)
from
.
import
frequencyAPI
from
csm.app.api.annisFindAPI
import
AnnisFindAPI
from
csm.app.api.corpusStorageManagerAPI
import
CorpusStorageManagerAPI
from
csm.app.api.frequencyAPI
import
FrequencyAPI
from
csm.app.api.subgraphAPI
import
SubgraphAPI
from
csm.app.api.textcomplexityAPI
import
TextComplexityAPI
api
.
add_resource
(
AnnisFindAPI
,
Config
.
SERVER_URI_ANNIS_FIND
,
endpoint
=
"find"
)
api
.
add_resource
(
CorpusStorageManagerAPI
,
Config
.
SERVER_URI_CSM
,
endpoint
=
"csm"
)
api
.
add_resource
(
FrequencyAPI
,
Config
.
SERVER_URI_FREQUENCY
,
endpoint
=
"frequency"
)
api
.
add_resource
(
SubgraphAPI
,
Config
.
SERVER_URI_CSM_SUBGRAPH
,
endpoint
=
"subgraph"
)
api
.
add_resource
(
TextComplexityAPI
,
Config
.
SERVER_URI_TEXT_COMPLEXITY
,
endpoint
=
'textcomplexity'
)
mc_backend/csm/app/api/corpusStorageManagerAPI.py
View file @
35d4aa7a
...
...
@@ -51,7 +51,8 @@ class CorpusStorageManagerAPI(Resource):
annotations_or_urn
:
str
=
args
[
"annotations"
]
aqls
:
List
[
str
]
=
args
[
"aqls"
]
exercise_type
:
ExerciseType
=
ExerciseType
[
args
[
"exercise_type"
]]
search_phenomena
:
List
[
Phenomenon
]
=
[
Phenomenon
[
x
]
for
x
in
args
[
"search_phenomena"
]]
search_phenomena
:
List
[
Phenomenon
]
=
[
Phenomenon
().
__getattribute__
(
x
.
upper
())
for
x
in
args
[
"search_phenomena"
]]
conll
:
List
[
TokenList
]
=
CorpusService
.
get_annotations_from_string
(
annotations_or_urn
)
ret_val
:
dict
=
CorpusService
.
process_corpus_data
(
title
,
conll
,
aqls
,
exercise_type
,
search_phenomena
)
# serialize the results to json
...
...
mc_backend/csm/app/api/frequencyAPI.py
View file @
35d4aa7a
from
typing
import
List
,
Dict
,
Set
import
flask
from
flask_restful
import
Resource
from
flask_restful.reqparse
import
RequestParser
from
mcserver.app.models
import
FrequencyAnalysis
,
Phenomenon
from
mcserver.app.models
import
Phenomenon
,
FrequencyItem
from
mcserver.app.services
import
NetworkService
,
CorpusService
,
AnnotationService
class
FrequencyAPI
(
Resource
):
def
__init__
(
self
):
self
.
reqparse
:
RequestParser
=
NetworkService
.
base_request_parser
.
copy
()
self
.
reqparse
.
add_argument
(
"urn"
,
type
=
str
,
required
=
True
,
default
=
""
,
location
=
"form"
,
help
=
"No URN provided"
)
super
(
FrequencyAPI
,
self
).
__init__
()
def
get
(
self
):
""" Returns results for a frequency query from ANNIS for a given CTS URN and AQL. """
# get request arguments
args
:
dict
=
flask
.
request
.
args
urn
:
str
=
args
[
"urn"
]
fa
:
FrequencyAnalysis
=
CorpusService
.
get_frequency_analysis
(
urn
,
is_csm
=
True
)
# map the abbreviated values found by ANNIS to our own model
skip_set
:
Set
[
Phenomenon
]
=
{
Phenomenon
.
lemma
,
Phenomenon
.
dependency
}
for
fi
in
fa
:
for
i
in
range
(
len
(
fi
.
values
)):
if
fi
.
phenomena
[
i
]
in
skip_set
:
continue
value_map
:
Dict
[
str
,
List
[
str
]]
=
AnnotationService
.
phenomenon_map
[
fi
.
phenomena
[
i
]]
fi
.
values
[
i
]
=
next
((
x
for
x
in
value_map
if
fi
.
values
[
i
]
in
value_map
[
x
]),
None
)
return
NetworkService
.
make_json_response
(
fa
.
serialize
())
def
get
(
urn
:
str
):
""" Returns results for a frequency query from ANNIS for a given CTS URN and AQL. """
fa
:
List
[
FrequencyItem
]
=
CorpusService
.
get_frequency_analysis
(
urn
,
is_csm
=
True
)
# map the abbreviated values found by ANNIS to our own model
skip_set
:
Set
[
Phenomenon
]
=
{
Phenomenon
.
LEMMA
,
Phenomenon
.
DEPENDENCY
}
for
fi
in
fa
:
for
i
in
range
(
len
(
fi
.
values
)):
if
fi
.
phenomena
[
i
]
in
skip_set
:
continue
value_map
:
Dict
[
str
,
List
[
str
]]
=
AnnotationService
.
phenomenon_map
[
fi
.
phenomena
[
i
]]
fi
.
values
[
i
]
=
next
((
x
for
x
in
value_map
if
fi
.
values
[
i
]
in
value_map
[
x
]),
None
)
return
NetworkService
.
make_json_response
([
x
.
to_dict
()
for
x
in
fa
])
mc_backend/csm/csm_api.yaml
0 → 100644
View file @
35d4aa7a
openapi
:
"
3.0.0"
info
:
title
:
Machina Callida Backend REST API (Corpus Storage Manager)
version
:
"
1.0"
servers
:
-
url
:
http://localhost:6555/mc/api/v1.0
paths
:
/frequency
:
get
:
summary
:
Returns results for a frequency query from ANNIS for a given CTS URN.
operationId
:
csm.app.api.frequencyAPI.get
responses
:
200
:
description
:
Frequency analysis, i.e. a list of frequency items.
content
:
application/json
:
schema
:
type
:
array
description
:
List of items with frequency data for linguistic phenomena.
items
:
$ref
:
"
../openapi_models.yaml#/components/schemas/FrequencyItem"
parameters
:
-
name
:
urn
in
:
query
description
:
CTS URN for referencing the corpus.
required
:
true
schema
:
type
:
string
example
:
urn:cts:latinLit:phi1254.phi001.perseus-lat2:5.6.21-5.6.21
mc_backend/mcserver/app/__init__.py
View file @
35d4aa7a
...
...
@@ -3,12 +3,14 @@ import logging
import
os
import
sys
from
logging.handlers
import
RotatingFileHandler
from
pathlib
import
Path
from
threading
import
Thread
from
time
import
strftime
from
typing
import
Type
import
connexion
import
flask
import
open_alchemy
import
prance
from
connexion
import
FlaskApp
from
flask
import
Flask
,
got_request_exception
,
request
,
Response
,
send_from_directory
from
flask_cors
import
CORS
...
...
@@ -21,7 +23,7 @@ db: SQLAlchemy = SQLAlchemy() # session_options={"autocommit": True}
migrate
:
Migrate
=
Migrate
(
directory
=
Config
.
MIGRATIONS_DIRECTORY
)
if
not
hasattr
(
open_alchemy
.
models
,
Config
.
DATABASE_TABLE_CORPUS
):
# do this _BEFORE_ you add any APIs to your application
init_yaml
(
Config
.
API_SPEC_YAML_FILE_PATH
,
base
=
db
.
Model
,
init_yaml
(
Config
.
API_SPEC_
MODELS_
YAML_FILE_PATH
,
base
=
db
.
Model
,
models_filename
=
os
.
path
.
join
(
Config
.
MC_SERVER_DIRECTORY
,
"models_auto.py"
))
...
...
@@ -76,10 +78,13 @@ def full_init(app: Flask, cfg: Type[Config] = Config) -> None:
def
init_app_common
(
cfg
:
Type
[
Config
]
=
Config
,
is_csm
:
bool
=
False
)
->
Flask
:
""" Initializes common Flask parts, e.g. CORS, configuration, database, migrations and custom corpora."""
spec_dir
:
str
=
Config
.
CSM_DIRECTORY
if
is_csm
else
Config
.
MC_SERVER_DIRECTORY
connexion_app
:
FlaskApp
=
connexion
.
FlaskApp
(
__name__
,
port
=
(
cfg
.
CORPUS_STORAGE_MANAGER_PORT
if
is_csm
else
cfg
.
HOST_PORT
),
specification_dir
=
Config
.
MC_SERVER_DIRECTORY
)
connexion_app
.
add_api
(
Config
.
API_SPEC_YAML_FILE_PATH
,
arguments
=
{
'title'
:
'Machina Callida Backend REST API'
})
__name__
,
port
=
(
cfg
.
CORPUS_STORAGE_MANAGER_PORT
if
is_csm
else
cfg
.
HOST_PORT
),
specification_dir
=
spec_dir
)
spec_path
:
str
=
Config
.
API_SPEC_CSM_FILE_PATH
if
is_csm
else
Config
.
API_SPEC_MCSERVER_FILE_PATH
parser
=
prance
.
ResolvingParser
(
spec_path
,
lazy
=
True
,
strict
=
False
)
# str(Path(spec_path).absolute())
parser
.
parse
()
connexion_app
.
add_api
(
parser
.
specification
)
apply_event_handlers
(
connexion_app
)
app
:
Flask
=
connexion_app
.
app
# allow CORS requests for all API routes
...
...
mc_backend/mcserver/app/api/__init__.py
View file @
35d4aa7a
...
...
@@ -6,10 +6,7 @@ from mcserver import Config
bp
=
Blueprint
(
"api"
,
__name__
)
api
=
Api
(
bp
)
from
.
import
corpusAPI
,
corpusListAPI
,
exerciseAPI
,
staticExercisesAPI
from
mcserver.app.api.exerciseListAPI
import
ExerciseListAPI
from
mcserver.app.api.fileAPI
import
FileAPI
from
mcserver.app.api.frequencyAPI
import
FrequencyAPI
from
.
import
corpusAPI
,
corpusListAPI
,
exerciseAPI
,
exerciseListAPI
,
fileAPI
,
frequencyAPI
,
staticExercisesAPI
from
mcserver.app.api.h5pAPI
import
H5pAPI
from
mcserver.app.api.kwicAPI
import
KwicAPI
from
mcserver.app.api.rawTextAPI
import
RawTextAPI
...
...
@@ -18,9 +15,6 @@ from mcserver.app.api.validReffAPI import ValidReffAPI
from
mcserver.app.api.vectorNetworkAPI
import
VectorNetworkAPI
from
mcserver.app.api.vocabularyAPI
import
VocabularyAPI
api
.
add_resource
(
ExerciseListAPI
,
Config
.
SERVER_URI_EXERCISE_LIST
,
endpoint
=
"exerciseList"
)
api
.
add_resource
(
FileAPI
,
Config
.
SERVER_URI_FILE
,
endpoint
=
"file"
)
api
.
add_resource
(
FrequencyAPI
,
Config
.
SERVER_URI_FREQUENCY
,
endpoint
=
"frequency"
)
api
.
add_resource
(
H5pAPI
,
Config
.
SERVER_URI_H5P
,
endpoint
=
"h5p"
)
api
.
add_resource
(
KwicAPI
,
Config
.
SERVER_URI_KWIC
,
endpoint
=
"kwic"
)
api
.
add_resource
(
RawTextAPI
,
Config
.
SERVER_URI_RAW_TEXT
,
endpoint
=
"rawtext"
)
...
...
mc_backend/mcserver/app/api/exerciseAPI.py
View file @
35d4aa7a
...
...
@@ -49,7 +49,7 @@ def get_graph_data(title: str, conll_string_or_urn: str, aqls: List[str], exerci
url
:
str
=
f
"
{
Config
.
INTERNET_PROTOCOL
}{
Config
.
HOST_IP_CSM
}
:
{
Config
.
CORPUS_STORAGE_MANAGER_PORT
}
"
data
:
str
=
json
.
dumps
(
dict
(
title
=
title
,
annotations
=
conll_string_or_urn
,
aqls
=
aqls
,
exercise_type
=
exercise_type
.
name
,
search_phenomena
=
[
x
.
name
for
x
in
search_phenomena
]
))
search_phenomena
=
search_phenomena
))
response
:
requests
.
Response
=
requests
.
post
(
url
,
data
=
data
)
try
:
return
json
.
loads
(
response
.
text
)
...
...
@@ -117,7 +117,8 @@ def post(exercise_data: dict) -> Union[Response, ConnexionResponse]:
search_values_list
:
List
[
str
]
=
json
.
loads
(
exercise_data
[
"search_values"
])
aqls
:
List
[
str
]
=
AnnotationService
.
map_search_values_to_aql
(
search_values_list
=
search_values_list
,
exercise_type
=
exercise_type
)
search_phenomena
:
List
[
Phenomenon
]
=
[
Phenomenon
[
x
.
split
(
"="
)[
0
]]
for
x
in
search_values_list
]
search_phenomena
:
List
[
Phenomenon
]
=
[
Phenomenon
().
__getattribute__
(
x
.
split
(
"="
)[
0
].
upper
())
for
x
in
search_values_list
]
urn
:
str
=
exercise_data
.
get
(
"urn"
,
""
)
# if there is custom text instead of a URN, immediately annotate it
conll_string_or_urn
:
str
=
urn
if
CorpusService
.
is_urn
(
urn
)
else
AnnotationService
.
get_udpipe
(
...
...
mc_backend/mcserver/app/api/exerciseListAPI.py
View file @
35d4aa7a
...
...
@@ -3,58 +3,39 @@ from typing import List, Set
import
conllu
from
conllu
import
TokenList
from
flask_restful
import
Resource
from
flask_restful.reqparse
import
RequestParser
from
mcserver.app
import
db
from
mcserver.app.models
import
Language
,
VocabularyCorpus
,
ResourceType
from
mcserver.app.services
import
NetworkService
,
FileService
from
mcserver.models_auto
import
Exercise
,
UpdateInfo
class
ExerciseListAPI
(
Resource
):
"""The exercise list API resource. It enables some of the CRUD operations for the exercises from the database."""
def
__init__
(
self
):
"""Initialize possible arguments for calls to the exercise list REST API."""
self
.
reqparse
:
RequestParser
=
NetworkService
.
base_request_parser
.
copy
()
self
.
reqparse
.
add_argument
(
"lang"
,
type
=
str
,
required
=
True
,
help
=
"No language specified"
)
self
.
reqparse
.
add_argument
(
"last_update_time"
,
type
=
int
,
required
=
False
,
default
=
0
,
help
=
"No milliseconds time for last update provided"
)
self
.
reqparse
.
add_argument
(
"vocabulary"
,
type
=
str
,
required
=
False
,
help
=
"No reference vocabulary provided"
)
self
.
reqparse
.
add_argument
(
"frequency_upper_bound"
,
type
=
int
,
required
=
False
,
help
=
"No upper bound for reference vocabulary frequency provided"
)
super
(
ExerciseListAPI
,
self
).
__init__
()
def
get
(
self
):
"""The GET method for the exercise list REST API. It provides metadata for all available exercises."""
args
:
dict
=
self
.
reqparse
.
parse_args
()
vocabulary_set
:
Set
[
str
]
last_update
:
int
=
args
[
"last_update_time"
]
ui_exercises
:
UpdateInfo
=
db
.
session
.
query
(
UpdateInfo
).
filter_by
(
resource_type
=
ResourceType
.
exercise_list
.
name
).
first
()
db
.
session
.
commit
()
if
ui_exercises
.
last_modified_time
<
last_update
/
1000
:
return
NetworkService
.
make_json_response
([])
try
:
vc
:
VocabularyCorpus
=
VocabularyCorpus
[
args
[
"vocabulary"
]]
vocabulary_set
=
FileService
.
get_vocabulary_set
(
vc
,
args
[
"frequency_upper_bound"
])
except
KeyError
:
vocabulary_set
=
set
()
lang
:
Language
try
:
lang
=
Language
(
args
[
"lang"
])
except
ValueError
:
lang
=
Language
.
English
exercises
:
List
[
Exercise
]
=
db
.
session
.
query
(
Exercise
).
filter_by
(
language
=
lang
.
value
)
db
.
session
.
commit
()
ret_val
:
List
[
dict
]
=
[
NetworkService
.
serialize_exercise
(
x
,
compress
=
True
)
for
x
in
exercises
]
matching_degrees
:
List
[
float
]
=
[]
if
len
(
vocabulary_set
):
for
exercise
in
exercises
:
conll
:
List
[
TokenList
]
=
conllu
.
parse
(
exercise
.
conll
)
lemmata
:
List
[
str
]
=
[
tok
[
"lemma"
]
for
sent
in
conll
for
tok
in
sent
.
tokens
]
matching_degrees
.
append
(
sum
((
1
if
x
in
vocabulary_set
else
0
)
for
x
in
lemmata
)
/
len
(
lemmata
)
*
100
)
for
i
in
range
(
len
(
ret_val
)):
ret_val
[
i
][
"matching_degree"
]
=
matching_degrees
[
i
]
return
NetworkService
.
make_json_response
(
ret_val
)
def
get
(
lang
:
str
,
frequency_upper_bound
:
int
,
last_update_time
:
int
,
vocabulary
:
str
=
""
):
"""The GET method for the exercise list REST API. It provides metadata for all available exercises."""
vocabulary_set
:
Set
[
str
]
ui_exercises
:
UpdateInfo
=
db
.
session
.
query
(
UpdateInfo
).
filter_by
(
resource_type
=
ResourceType
.
exercise_list
.
name
).
first
()
db
.
session
.
commit
()
if
ui_exercises
.
last_modified_time
<
last_update_time
/
1000
:
return
NetworkService
.
make_json_response
([])
try
:
vc
:
VocabularyCorpus
=
VocabularyCorpus
[
vocabulary
]
vocabulary_set
=
FileService
.
get_vocabulary_set
(
vc
,
frequency_upper_bound
)
except
KeyError
:
vocabulary_set
=
set
()
lang
:
Language
try
:
lang
=
Language
(
lang
)
except
ValueError
:
lang
=
Language
.
English
exercises
:
List
[
Exercise
]
=
db
.
session
.
query
(
Exercise
).
filter_by
(
language
=
lang
.
value
)
db
.
session
.
commit
()
ret_val
:
List
[
dict
]
=
[
NetworkService
.
serialize_exercise
(
x
,
compress
=
True
)
for
x
in
exercises
]
matching_degrees
:
List
[
float
]
=
[]
if
len
(
vocabulary_set
):
for
exercise
in
exercises
:
conll
:
List
[
TokenList
]
=
conllu
.
parse
(
exercise
.
conll
)
lemmata
:
List
[
str
]
=
[
tok
[
"lemma"
]
for
sent
in
conll
for
tok
in
sent
.
tokens
]
matching_degrees
.
append
(
sum
((
1
if
x
in
vocabulary_set
else
0
)
for
x
in
lemmata
)
/
len
(
lemmata
)
*
100
)
for
i
in
range
(
len
(
ret_val
)):
ret_val
[
i
][
"matching_degree"
]
=
matching_degrees
[
i
]
return
NetworkService
.
make_json_response
(
ret_val
)
mc_backend/mcserver/app/api/fileAPI.py
View file @
35d4aa7a
...
...
@@ -5,8 +5,10 @@ import uuid
from
datetime
import
datetime
from
typing
import
List
,
Union
import
connexion
import
flask
from
flask
import
send_from_directory
from
connexion.lifecycle
import
ConnexionResponse
from
flask
import
send_from_directory
,
Response
from
flask_restful
import
Resource
,
abort
from
flask_restful.reqparse
import
RequestParser
from
werkzeug.wrappers
import
ETagResponseMixin
...
...
@@ -18,71 +20,6 @@ from mcserver.config import Config
from
mcserver.models_auto
import
Exercise
,
UpdateInfo
,
LearningResult
class
FileAPI
(
Resource
):
"""The file API resource. It allows users to download files that are stored as strings in the database."""
def
__init__
(
self
):
"""Initialize possible arguments for calls to the file REST API."""
self
.
reqparse
:
RequestParser
=
NetworkService
.
base_request_parser
.
copy
()
self
.
reqparse
.
add_argument
(
"id"
,
type
=
str
,
required
=
False
,
location
=
"args"
,
help
=
"No exercise ID or URN provided"
)
self
.
reqparse
.
add_argument
(
"type"
,
type
=
str
,
required
=
False
,
location
=
"args"
,
help
=
"No file type provided"
)
self
.
reqparse
.
add_argument
(
"solution_indices"
,
type
=
str
,
required
=
False
,
location
=
"args"
,
help
=
"No solution IDs provided"
)
self
.
reqparse
.
add_argument
(
"learning_result"
,
type
=
str
,
required
=
False
,
location
=
"form"
,
help
=
"No learning result provided"
)
self
.
reqparse
.
add_argument
(
"html_content"
,
type
=
str
,
required
=
False
,
location
=
"form"
,
help
=
"No HTML content provided"
)
self
.
reqparse
.
add_argument
(
"file_type"
,
type
=
str
,
required
=
False
,
location
=
"form"
,
help
=
"No file type provided"
)
self
.
reqparse
.
add_argument
(
"urn"
,
type
=
str
,
required
=
False
,
location
=
"form"
,
help
=
"No URN provided"
)
super
(
FileAPI
,
self
).
__init__
()
def
get
(
self
)
->
ETagResponseMixin
:
"""The GET method for the file REST API. It provides the URL to download a specific file."""
clean_tmp_folder
()
args
=
self
.
reqparse
.
parse_args
()
eid
:
str
=
args
[
"id"
]
exercise
:
Exercise
=
db
.
session
.
query
(
Exercise
).
filter_by
(
eid
=
eid
).
first
()
db
.
session
.
commit
()
file_type
:
FileType
=
FileType
[
args
[
"type"
]]
file_name
:
str
=
eid
+
"."
+
file_type
.
value
mime_type
:
str
=
MimeType
[
file_type
.
value
].
value
if
exercise
is
None
:
# try and see if a file is already cached on disk
if
not
os
.
path
.
exists
(
os
.
path
.
join
(
Config
.
TMP_DIRECTORY
,
file_name
)):
abort
(
404
)
return
send_from_directory
(
Config
.
TMP_DIRECTORY
,
file_name
,
mimetype
=
mime_type
,
as_attachment
=
True
)
exercise
.
last_access_time
=
datetime
.
utcnow
().
timestamp
()
db
.
session
.
commit
()
solution_indices
:
List
[
int
]
=
json
.
loads
(
args
[
"solution_indices"
]
if
args
[
"solution_indices"
]
else
"null"
)
if
solution_indices
is
not
None
:
file_name
=
eid
+
"-"
+
str
(
uuid
.
uuid4
())
+
"."
+
file_type
.
value
existing_file
:
DownloadableFile
=
next
(
(
x
for
x
in
FileService
.
downloadable_files
if
x
.
id
+
"."
+
x
.
file_type
.
value
==
file_name
),
None
)
if
existing_file
is
None
:
existing_file
=
FileService
.
make_tmp_file_from_exercise
(
file_type
,
exercise
,
solution_indices
)
return
send_from_directory
(
Config
.
TMP_DIRECTORY
,
existing_file
.
file_name
,
mimetype
=
mime_type
,
as_attachment
=
True
)
def
post
(
self
)
->
Union
[
None
,
ETagResponseMixin
]:
""" The POST method for the File REST API.
It writes learning results or HTML content to the disk for later access. """
form_data
:
dict
=
flask
.
request
.
form
lr_string
:
str
=
form_data
.
get
(
"learning_result"
,
None
)
if
lr_string
:
lr_dict
:
dict
=
json
.
loads
(
lr_string
)
for
exercise_id
in
lr_dict
:
xapi_statement
:
XapiStatement
=
XapiStatement
(
lr_dict
[
exercise_id
])
save_learning_result
(
xapi_statement
)
else
:
file_type
:
FileType
=
FileType
[
form_data
[
"file_type"
]]
existing_file
:
DownloadableFile
=
FileService
.
make_tmp_file_from_html
(
form_data
[
"urn"
],
file_type
,
form_data
[
"html_content"
])
return
NetworkService
.
make_json_response
(
existing_file
.
file_name
)
def
clean_tmp_folder
():
""" Cleans the files directory regularly. """
ui_file
:
UpdateInfo
=
db
.
session
.
query
(
UpdateInfo
).
filter_by
(
resource_type
=
ResourceType
.
file_api_clean
.
name
).
first
()
...
...
@@ -91,7 +28,7 @@ def clean_tmp_folder():
for
file
in
[
x
for
x
in
os
.
listdir
(
Config
.
TMP_DIRECTORY
)
if
x
not
in
".gitignore"
]:
file_to_delete_type
:
str
=
os
.
path
.
splitext
(
file
)[
1
].
replace
(
"."
,
""
)
file_to_delete
:
DownloadableFile
=
next
((
x
for
x
in
FileService
.
downloadable_files
if
x
.
file_name
==
file
and
x
.
file_type
.
value
==
file_to_delete_type
),
x
.
file_name
==
file
and
x
.
file_type
==
file_to_delete_type
),
None
)
if
file_to_delete
is
not
None
:
FileService
.
downloadable_files
.
remove
(
file_to_delete
)
...
...
@@ -100,6 +37,48 @@ def clean_tmp_folder():
db
.
session
.
commit
()
def
get
(
id
:
str
,
type
:
FileType
,
solution_indices
:
List
[
int
])
->
Union
[
ETagResponseMixin
,
ConnexionResponse
]:
"""The GET method for the file REST API. It provides the URL to download a specific file."""
clean_tmp_folder
()
exercise
:
Exercise
=
db
.
session
.
query
(
Exercise
).
filter_by
(
eid
=
id
).
first
()
db
.
session
.
commit
()
file_name
:
str
=
id
+
"."
+
str
(
type
)
mime_type
:
str
=
MimeType
[
type
].
value
if
exercise
is
None
:
# try and see if a file is already cached on disk
if
not
os
.
path
.
exists
(
os
.
path
.
join
(
Config
.
TMP_DIRECTORY
,
file_name
)):
return
connexion
.
problem
(
404
,
Config
.
ERROR_TITLE_NOT_FOUND
,
Config
.
ERROR_MESSAGE_EXERCISE_NOT_FOUND
)
return
send_from_directory
(
Config
.
TMP_DIRECTORY
,
file_name
,
mimetype
=
mime_type
,
as_attachment
=
True
)
exercise
.
last_access_time
=
datetime
.
utcnow
().
timestamp
()
db
.
session
.
commit
()
if
solution_indices
:
file_name
=
id
+
"-"
+
str
(
uuid
.
uuid4
())
+
"."
+
str
(
type
)
existing_file
:
DownloadableFile
=
next
(
(
x
for
x
in
FileService
.
downloadable_files
if
x
.
id
+
"."
+
str
(
x
.
file_type
)
==
file_name
),
None
)
if
existing_file
is
None
:
existing_file
=
FileService
.
make_tmp_file_from_exercise
(
type
,
exercise
,
solution_indices
)
return
send_from_directory
(
Config
.
TMP_DIRECTORY
,
existing_file
.
file_name
,
mimetype
=
mime_type
,
as_attachment
=
True
)
def
post
(
file_data
:
dict
)
->
Response
:
""" The POST method for the File REST API.
It writes learning results or HTML content to the disk for later access. """
lr_string
:
str
=
file_data
.
get
(
"learning_result"
,
None
)
if
lr_string
:
lr_dict
:
dict
=
json
.
loads
(
lr_string
)
for
exercise_id
in
lr_dict
:
xapi_statement
:
XapiStatement
=
XapiStatement
(
lr_dict
[
exercise_id
])
save_learning_result
(
xapi_statement
)
return
NetworkService
.
make_json_response
(
str
(
True
))
else
:
file_type
:
FileType
=
file_data
[
"file_type"
]
existing_file
:
DownloadableFile
=
FileService
.
make_tmp_file_from_html
(
file_data
[
"urn"
],
file_type
,
file_data
[
"html_content"
])
return
NetworkService
.
make_json_response
(
existing_file
.
file_name
)
def
save_learning_result
(
xapi_statement
:
XapiStatement
)
->
LearningResult
:
"""Creates a new Learning Result from a XAPI Statement and saves it to the database."""
learning_result
:
LearningResult
=
LearningResultMC
.
from_dict
(
...
...
mc_backend/mcserver/app/api/frequencyAPI.py
View file @
35d4aa7a
import
flask
import
requests
from
flask_restful
import
Resource
import
rapidjson
as
json
from
flask_restful.reqparse
import
RequestParser
from
mcserver
import
Config
from
mcserver.app.services
import
NetworkService
class
FrequencyAPI
(
Resource
):
def
__init__
(
self
):
# TODO: FIX THE REQUEST PARSING FOR ALL APIs
self
.
reqparse
:
RequestParser
=
NetworkService
.
base_request_parser
.
copy
()
self
.
reqparse
.
add_argument
(
"urn"
,
type
=
str
,
required
=
True
,
default
=
""
,
location
=
"form"
,
help
=
"No URN provided"
)
super
(
FrequencyAPI
,
self
).
__init__
()
def
get
(
self
):
""" Returns results for a frequency query from ANNIS for a given CTS URN and AQL. """
# get request arguments
args
:
dict
=
flask
.
request
.
args
urn
:
str
=
args
[
"urn"
]
url
:
str
=
f
"
{
Config
.
INTERNET_PROTOCOL
}{
Config
.
HOST_IP_CSM
}
:
{
Config
.
CORPUS_STORAGE_MANAGER_PORT
}
"
+
\
Config
.
SERVER_URI_FREQUENCY
response
:
requests
.
Response
=
requests
.
get
(
url
,
params
=
dict
(
urn
=
urn
))
return
NetworkService
.
make_json_response
(
json
.
loads
(
response
.
text
))
def
get
(
urn
:
str
):
""" Returns results for a frequency query from ANNIS for a given CTS URN and AQL. """
url
:
str
=
f
"
{
Config
.
INTERNET_PROTOCOL
}{
Config
.
HOST_IP_CSM
}
:
{
Config
.
CORPUS_STORAGE_MANAGER_PORT
}
"
+
\
Config
.
SERVER_URI_FREQUENCY
response
:
requests
.
Response
=
requests
.
get
(
url
,
params
=
dict
(
urn
=
urn
))
return
NetworkService
.
make_json_response
(
json
.
loads
(
response
.
text
))
mc_backend/mcserver/app/api/staticExercisesAPI.py
View file @
35d4aa7a
...
...
@@ -27,7 +27,8 @@ def get() -> Union[Response, ConnexionResponse]:
if
datetime
.
fromtimestamp
(
time
()
-
Config
.
INTERVAL_STATIC_EXERCISES
)
>
NetworkService
.
exercises_last_update
\
or
len
(
NetworkService
.
exercises
)
==
0
:
return
update_exercises
()
return
NetworkService
.
make_json_response
({
k
:
v
.
__dict__
for
(
k
,
v
)
in
NetworkService
.
exercises
.
items
()})
return
NetworkService
.
make_json_response
(
{
x
:
NetworkService
.
exercises
[
x
].
to_dict
()
for
x
in
NetworkService
.
exercises
})
def
get_relevant_strings
(
response
:
Response
):
...
...
@@ -136,11 +137,13 @@ def update_exercises() -> Union[Response, ConnexionResponse]:
search_results_dict
:
Dict
[
str
,
int
]
=
{
item
[
0
]:
i
for
(
i
,
item
)
in
enumerate
(
search_results
)}
for
url
in
relevant_strings_dict
:
# the URN points to Cicero's letters to his brother Quintus, 1.1.8-1.1.10
NetworkService
.
exercises
[
url
]
=
StaticExercise
(
urn
=
"urn:cts:latinLit:phi0474.phi058.perseus-lat1:1.1.8-1.1.10"
)
NetworkService
.
exercises
[
url
]
=
StaticExercise
(
solutions
=
[],
urn
=
"urn:cts:latinLit:phi0474.phi058.perseus-lat1:1.1.8-1.1.10"
)
for
word
in
relevant_strings_dict
[
url
]:
# UDpipe cannot handle name abbreviations, so remove the punctuation and only keep the upper case letter
if
word
[
-
1
]
in
string
.
punctuation
:
word
=
word
[:
-
1
]
NetworkService
.
exercises
[
url
].
solutions
.
append
(
list
(
search_results
[
search_results_dict
[
word
]]))
NetworkService
.
exercises_last_update
=
datetime
.
fromtimestamp
(
time
())
return
NetworkService
.
make_json_response
({
k
:
v
.
__dict__
for
(
k
,
v
)
in
NetworkService
.
exercises
.
items
()})
return
NetworkService
.
make_json_response
(
{
x
:
NetworkService
.
exercises
[
x
].
to_dict
()
for
x
in
NetworkService
.
exercises
})
mc_backend/mcserver/app/models.py
View file @
35d4aa7a
"""Models for dealing with text data, both in the database and in the application itself."""
from
typing
import
Dict
,
List
,
Union
,
Any
from
typing
import
Dict
,
List
from
enum
import
Enum
import
typing
from
mcserver.config
import
Config
from
mcserver.models_auto
import
TExercise
,
Corpus
,
TCorpus
,
Exercise
,
TLearningResult
,
LearningResult
from
openapi.openapi_server.models
import
SolutionElement
,
Solution
,
Link
,
NodeMC
,
TextComplexity
,
AnnisResponse
,
\
GraphData
GraphData
,
StaticExercise
,
FileType
,
FrequencyItem
,
Phenomenon
AnnisResponse
=
AnnisResponse
FileType
=
FileType
FrequencyItem
=
FrequencyItem
GraphData
=
GraphData
LinkMC
=
Link
NodeMC
=
NodeMC
Phenomenon
=
Phenomenon
SolutionElement
=
SolutionElement
StaticExercise
=
StaticExercise
TextComplexity
=
TextComplexity
...
...
@@ -74,7 +78,8 @@ class Dependency(Enum):
punctuation
=
26
root
=
27
subject
=
28
vocative
=
29
unspecified
=
29
vocative
=
30
class
ExerciseType
(
Enum
):
...
...
@@ -84,11 +89,8 @@ class ExerciseType(Enum):
matching
=
"matching"
class
FileType
(
Enum
):
docx
=
"docx"
json
=
"json"
pdf
=
"pdf"
xml
=
"xml"
class
Feats
(
Enum
):
Case
=
"case"
class
Language
(
Enum
):
...
...
@@ -129,13 +131,6 @@ class PartOfSpeech(Enum):
verb
=
15
class
Phenomenon
(
Enum
):
case
=
"feats"
dependency
=
"dependency"
lemma
=
"lemma"
partOfSpeech
=
"upostag"
class
ResourceType
(
Enum
):
"""Resource types for the UpdateInfo table in the database.
...
...
@@ -466,35 +461,3 @@ class Sentence:
def
__init__
(
self
,
id
:
int
,
matching_degree
:
int
):
self
.
id
=
id
self
.
matching_degree
=
matching_degree
class
StaticExercise
:
def
__init__
(
self
,
solutions
:
List
[
List
[
str
]]
=
None
,
urn
:
str
=
""
):
self
.
solutions
=
[]
if
solutions
is
None
else
solutions
self
.
urn
=
urn
class
FrequencyItem
:
def
__init__
(
self
,
values
:
List
[
str
],
phenomena
:
List
[
Phenomenon
],
count
:
Union
[
int
,
Any
]):
self
.
values
=
values
self
.
phenomena
=
phenomena
self
.
count
=
count
def
serialize
(
self
)
->
dict
: