Compare commits

..

10 Commits

Author SHA1 Message Date
sam cd81b7514a Tracked mca_querying.py. 2026-05-26 17:33:25 +01:00
sam c7bb1608a1 Progress towards sensible-enough numpy based querying using shared memory views rather than duplicated data. 2026-05-26 17:32:56 +01:00
sam b9ffbdee89 Created mca_stubs.py which provides record entry details (and slice objects) for the .MCA types. 2026-05-26 11:06:28 +01:00
sam e887cc791e Removal of sqlite/sqlalchemy based approach - it is too slow to combine the results, even with in-memory database loading. 2026-05-26 10:17:28 +01:00
sam 0479f1e4a8 Improved a few things, querying for multiple services now runs at a tolerable speed. Would prefer if it could be improved further, will look at pre-merging tables using sql rather than pandas. 2026-05-25 21:21:53 +01:00
sam e723109a0a Begun creating some utility functions and noticed some limitations. Fetching one schedule at a time is too slow, and we could easily split an aggregated result. 2026-05-25 17:46:13 +01:00
sam 36aa23f464 Various minor updates, basic Schedule class. Added a SixData class to manage conversions of YYMMDD to/from more pythonic objects. 2026-05-25 14:06:35 +01:00
sam c2633952d3 Added mca_queries.py and it's pre-generated result mca_record_types.py. The latter is for type hinting and will make writing queries to solve for schedule numbers much easier. Next will be to write tools to make hunting for desired schedules easier. 2026-05-25 13:26:11 +01:00
sam 51c4f5030c Updated the raw_mca_... table generation to include line number from the file, and schedule number - although we may need to investigate how the last entry behaves with 'ZZ' records and any others. We don't want to inherit the technical debt of remembering this one case every time. 2026-05-23 10:38:03 +01:00
sam f35cda6f10 Finished parsing.py initial implementation, now have a sqlite database generating >600MB of timetable records. Next will be generating sqlalchemy desriptors based on the automated specifications. If I can re-learn sqlalchemy that is. 2026-05-22 16:57:14 +01:00
6 changed files with 859 additions and 69 deletions
+3 -1
View File
@@ -10,7 +10,9 @@ dependencies = [
"requests (>=2.34.2,<3.0.0)",
"pypdf (>=6.12.0,<7.0.0)",
"pandas (>=3.0.3,<4.0.0)",
"pandas-stubs (>=3.0.0.260204,<4.0.0.0)"
"pandas-stubs (>=3.0.0.260204,<4.0.0.0)",
"sqlalchemy (>=2.0.49,<3.0.0)",
"py-spy (>=0.4.2,<0.5.0)"
]
+1 -8
View File
@@ -1,10 +1,3 @@
from national_rail_timetable.nr_requests import fetch_nr_token, fetch_nr_timetable_files
from national_rail_timetable.parsing import (
extract_specification_document_tables,
store_specification_table_raws,
read_specification_table_raws,
create_mca_specification_dbschema,
main,
)
from national_rail_timetable.mca_querying import main
print(main())
+191
View File
@@ -0,0 +1,191 @@
"""
MCA file querying to retrieve schedule(d service)s.
"""
# pyright: reportAny=false
# pyright: reportUnknownVariableType=false
# pyright: reportUnknownArgumentType=false
# pyright: reportAttributeAccessIssue=false
# pyright: reportOperatorIssue=false
# Imports
from dataclasses import dataclass
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Callable, Self
from zipfile import ZipFile
import numpy as np
from numpy.typing import NDArray
from national_rail_timetable.nr_requests import fetch_nr_timetable_files
from national_rail_timetable.mca_stubs import BS, BX, LI, LO, LT, CR
# Functions
def ux(s1: NDArray[np.byte]) -> NDArray[np.str_]:
x = s1.shape[1]
s1.dtype = f"S{x}"
return s1.astype(f"U{x}")[:, 0]
def sx(s1: NDArray[np.byte]) -> NDArray[np.byte]:
x = s1.shape[1]
s1.dtype = f"S{x}"
return s1[:, 0]
# Classes
@dataclass
class Timetable:
array: NDArray[np.byte]
@classmethod
def from_zipfile(
cls,
zipfile: ZipFile | None = None,
) -> Self:
zipfile = zipfile if zipfile is not None else fetch_nr_timetable_files()
with TemporaryDirectory() as tempdir:
name = zipfile.extract(
[
zipinfo
for name, zipinfo in zipfile.NameToInfo.items()
if name.split(".")[-1] == "MCA"
][0],
path=tempdir,
)
array: NDArray[np.byte] = np.fromfile(
Path(tempdir) / name, dtype="S1"
).reshape((-1, 82))[:, :-2]
return cls(array=array)
def bs_mask(self) -> NDArray[np.bool]:
return (self.array[:, 0] == b"B") & (self.array[:, 1] == b"S")
def lo_mask(self) -> NDArray[np.bool]:
return (self.array[:, 0] == b"L") & (self.array[:, 1] == b"O")
@property
def sns(self) -> NDArray[np.integer]:
return np.repeat(
(
sns := [
0,
*np.arange(self.array.shape[0])[self.bs_mask()],
self.array.shape[0],
]
)[:-1],
np.diff(sns),
)
def fetch_schedules(self, *sns: int) -> NDArray[np.byte]:
return self.array[np.isin(self.sns, sns)]
@dataclass
class Query:
tt: Timetable
sns: NDArray[np.integer] | None = None
def _query_from_mask(self, mask: NDArray[np.bool]) -> Query:
sns: NDArray[np.integer] = self.tt.sns[mask]
if self.sns is not None:
sns = np.intersect1d(sns, self.sns)
return Query(self.tt, sns)
def on_date(self, date: str) -> Query:
mask: NDArray[np.bool] = (
(sx(self.tt.array[:, LO().record_identity()]) == b"BS")
& (sx(self.tt.array[:, BS().date_runs_from()]) <= date.encode())
& (sx(self.tt.array[:, BS().date_runs_to()]) >= date.encode())
)
return self._query_from_mask(mask)
def origin(self, tiploc: str) -> Query:
mask: NDArray[np.bool] = (
(sx(self.tt.array[:, LO().record_identity()]) == b"LO")
& (
sx(self.tt.array[:, LO().location().start : LO().location().stop - 1])
== f"{tiploc:<7}".encode()
),
)[0]
return self._query_from_mask(mask)
def dest(self, tiploc: str, call_number: str | int | None = None) -> Query:
mask: NDArray[np.bool] = (
(sx(self.tt.array[:, LO().record_identity()]) == b"LT")
& (
sx(self.tt.array[:, LT().location().start : LT().location().stop - 1])
== f"{tiploc:<7}".encode()
),
)[0]
if call_number is not None:
mask &= (
sx(self.tt.array[:, LT().location().stop]) == str(call_number).encode()
)
return self._query_from_mask(mask)
def calls(self, tiploc: str, call_number: str | int | None = None) -> Query:
mask: NDArray[np.bool] = (
(sx(self.tt.array[:, LI().record_identity()]) == b"LI")
& (
sx(self.tt.array[:, LI().location().start : LI().location().stop - 1])
== f"{tiploc:<7}".encode()
),
)[0]
if call_number is not None:
mask &= (
sx(self.tt.array[:, LI().location().stop]) == str(call_number).encode()
)
return self._query_from_mask(mask) | self.origin(tiploc) | self.dest(tiploc)
def get_field(self, mca_field: Callable[..., slice]):
record_type = str(mca_field).split("method ")[1].split(".")[0]
return sx(
self.result[sx(self.result[:, :2]) == record_type.encode(), mca_field()]
)
@property
def _a(self):
return self.tt.array
@property
def result(self) -> NDArray[np.byte]:
assert self.sns is not None
return self.tt.fetch_schedules(*self.sns)
def as_sx(self) -> NDArray[np.byte]:
return sx(self.result)
def as_ux(self) -> NDArray[np.str_]:
return ux(self.result)
def __and__(self, other: Query) -> Query:
assert self.tt is other.tt
assert self.sns is not None
assert other.sns is not None
return Query(self.tt, np.intersect1d(self.sns, other.sns))
def __or__(self, other: Query) -> Query:
assert self.tt is other.tt
assert self.sns is not None
assert other.sns is not None
return Query(self.tt, np.union1d(self.sns, other.sns))
# Script
def main():
try:
tt = Timetable(
np.load(Path(__file__).parents[2] / "data/cache.mca.npy"),
)
except FileNotFoundError:
tt = Timetable.from_zipfile()
np.save(Path(__file__).parents[2] / "data/cache.mca", tt.array)
result = Query(tt).on_date("260526").calls("CRMLNGT")
print(result.as_ux())
print(result.get_field(BX().retail_service_id))
if __name__ == "__main__":
main()
+622
View File
@@ -0,0 +1,622 @@
# This file is generated by parsing.generate_mca_stubs.
# Do not modify by hand.
# Imports
from dataclasses import dataclass
# Classes
@dataclass
class BS:
def record_identity(self):
"""With the constant value BS."""
return slice(0, 2)
def transaction_type(self):
"""N = New. D = Delete. R = Revise."""
return slice(2, 3)
def train_uid(self):
"""Unique train Identifier."""
return slice(3, 9)
def date_runs_from(self):
"""yymmdd"""
return slice(9, 15)
def date_runs_to(self):
"""yymmdd"""
return slice(15, 21)
def days_run(self):
"""No description."""
return slice(21, 28)
def bank_holiday_running(self):
"""No description."""
return slice(28, 29)
def train_status(self):
"""No description."""
return slice(29, 30)
def train_category(self):
"""No description."""
return slice(30, 32)
def train_identity(self):
"""No description."""
return slice(32, 36)
def headcode(self):
"""No description."""
return slice(36, 40)
def course_indicator(self):
"""Not used - always set to 1."""
return slice(40, 41)
def profit_centre_code(self):
"""No description."""
return slice(41, 49)
def business_sector(self):
"""Now used to contain the portion suffix for RSID"""
return slice(49, 50)
def power_type(self):
"""No description."""
return slice(50, 53)
def timing_load(self):
"""No description."""
return slice(53, 57)
def speed(self):
"""No description."""
return slice(57, 60)
def operating_chars(self):
"""No description."""
return slice(60, 66)
def train_class(self):
"""No description."""
return slice(66, 67)
def sleepers(self):
"""No description."""
return slice(67, 68)
def reservations(self):
"""Permitted values are: A Seat Reservations Compulsory (R symbol in white box) E Reservations for Bicycles Essential (Inverted black triangle) R Seat Reservations Recommended (R symbol in black box) S Seat Reservations possible from any station (white diamond symbol)"""
return slice(68, 69)
def connect_indicator(self):
"""Not used - always set to blank."""
return slice(69, 70)
def catering_code(self):
"""No description."""
return slice(70, 74)
def service_branding(self):
"""No description."""
return slice(74, 78)
def spare(self):
"""No description."""
return slice(78, 79)
def stp_indicator(self):
"""C = STP cancellation of permanent schedule. N = New STP schedule. O = STP overlay of permanent schedule. P = Permanent. Read in association with the Transaction Type in Field 2"""
return slice(79, 80)
@dataclass
class HD:
def record_identity(self):
"""With the constant value HD."""
return slice(0, 2)
def file_identity(self):
"""No description."""
return slice(2, 22)
def date_of_extract(self):
"""Format ddmmyy defining the date that the BTD extract file was created."""
return slice(22, 28)
def time_of_extract(self):
"""hhmm defining the time that the BTD extract file was created."""
return slice(28, 32)
def current_file_reference(self):
"""Unique file reference."""
return slice(32, 39)
def last_file_reference(self):
"""Unique file reference."""
return slice(39, 46)
def update_indicator(self):
"""U=Update. F=Full extract."""
return slice(46, 47)
def version(self):
"""Version identifier of CIF software."""
return slice(47, 48)
def extract_start_date(self):
"""Same as Field 3 above."""
return slice(48, 54)
def extract_end_date(self):
"""No description."""
return slice(54, 60)
def spare(self):
"""No description."""
return slice(60, 80)
@dataclass
class ZZ:
def record_identity(self):
"""With the constant value ZZ."""
return slice(0, 2)
def spare(self):
"""No description."""
return slice(2, 80)
@dataclass
class TA:
def record_identity(self):
"""With the constant value TA."""
return slice(0, 2)
def tiploc_code(self):
"""A TIPLOC is 4-7 characters. If less than 7 then it will be padded by blanks."""
return slice(2, 9)
def capitals(self):
"""Defines capitalisation of TIPLOC. Can be ignored for retailing/journey planners."""
return slice(9, 11)
def national_location_code(self):
"""No description."""
return slice(11, 17)
def nlc_check_character(self):
"""No description."""
return slice(17, 18)
def tps_description(self):
"""No description."""
return slice(18, 44)
def stanox(self):
"""TOPS location code."""
return slice(44, 49)
def po_mcp_code(self):
"""Post Office Location Code. (Not used but may contain historic data or three blank spaces followed by 0)."""
return slice(49, 53)
def crs_code(self):
"""No description."""
return slice(53, 56)
def description(self):
"""Description used in LENNON."""
return slice(56, 72)
def new_tiploc(self):
"""Only present if TIPLOC change."""
return slice(72, 79)
def spare(self):
"""No description."""
return slice(79, 80)
@dataclass
class CR:
def record_identity(self):
"""With the constant value CR."""
return slice(0, 2)
def location(self):
"""TIPLOC + Suffix. Suffix is always the eighth character."""
return slice(2, 10)
def train_category(self):
"""No description."""
return slice(10, 12)
def train_identity(self):
"""No description."""
return slice(12, 16)
def headcode(self):
"""No description."""
return slice(16, 20)
def course_indicator(self):
"""No description."""
return slice(20, 21)
def profit_centre_code(self):
"""No description."""
return slice(21, 29)
def business_sector(self):
"""No description."""
return slice(29, 30)
def power_type(self):
"""No description."""
return slice(30, 33)
def timing_load(self):
"""No description."""
return slice(33, 37)
def speed(self):
"""No description."""
return slice(37, 40)
def operating_chars(self):
"""No description."""
return slice(40, 46)
def train_class(self):
"""No description."""
return slice(46, 47)
def sleepers(self):
"""No description."""
return slice(47, 48)
def reservations(self):
"""No description."""
return slice(48, 49)
def connect_indicator(self):
"""No description."""
return slice(49, 50)
def catering_code(self):
"""No description."""
return slice(50, 54)
def service_branding(self):
"""No description."""
return slice(54, 58)
def traction_class(self):
"""No description."""
return slice(58, 62)
def uic_code(self):
"""Only populated for trains travelling to/from Europe via the Channel Tunnel, otherwise blank."""
return slice(62, 67)
def retail_service_id(self):
"""No description."""
return slice(67, 75)
def spare(self):
"""No description."""
return slice(75, 80)
@dataclass
class LT:
def record_identity(self):
"""With the constant value LT."""
return slice(0, 2)
def location(self):
"""TIPLOC +Suffix. Suffix is always the eighth character."""
return slice(2, 10)
def scheduled_arrival_time(self):
"""No description."""
return slice(10, 15)
def public_arrival_time(self):
"""If there is no Public Arrival time this field will default to 0000."""
return slice(15, 19)
def platform(self):
"""No description."""
return slice(19, 22)
def path(self):
"""No description."""
return slice(22, 25)
def activity(self):
"""Up to 6 activity codes may be present. The first 2 characters will always be TF (train finishes). If there are no other activity codes, this defaults to being an advertised arrival."""
return slice(25, 37)
def spare(self):
"""No description."""
return slice(37, 80)
@dataclass
class LI:
def record_identity(self):
"""With the constant value LI."""
return slice(0, 2)
def location(self):
"""TIPLOC + Suffix. Suffix is always the eighth character."""
return slice(2, 10)
def scheduled_arrival_time(self):
"""No description."""
return slice(10, 15)
def scheduled_departure_time(self):
"""No description."""
return slice(15, 20)
def scheduled_pass(self):
"""No description."""
return slice(20, 25)
def public_arrival(self):
"""If there is no Public Arrival time this field will default to 0000."""
return slice(25, 29)
def public_departure(self):
"""If there is no Public Departure time this field will default to 0000."""
return slice(29, 33)
def platform(self):
"""No description."""
return slice(33, 36)
def line(self):
"""No description."""
return slice(36, 39)
def path(self):
"""No description."""
return slice(39, 42)
def activity(self):
"""Up to 6 activity codes may be present."""
return slice(42, 54)
def engineering_allowance(self):
"""No description."""
return slice(54, 56)
def pathing_allowance(self):
"""No description."""
return slice(56, 58)
def performance_allowance(self):
"""No description."""
return slice(58, 60)
def spare(self):
"""No description."""
return slice(60, 80)
@dataclass
class TD:
def record_identity(self):
"""With the constant value TD."""
return slice(0, 2)
def tiploc_code(self):
"""No description."""
return slice(2, 9)
def spare(self):
"""No description."""
return slice(9, 80)
@dataclass
class AA:
def record_identity(self):
"""With the constant value AA."""
return slice(0, 2)
def transaction_type(self):
"""N = New. D = Delete. R = Revise."""
return slice(2, 3)
def base_uid(self):
"""One of the trains involved in the association. This will always be the through train, not the splitting/joining portion."""
return slice(3, 9)
def assoc_uid(self):
"""The other train involved."""
return slice(9, 15)
def assoc_start_date(self):
"""Format: yymmdd. May not be the same as the dates of the train schedules."""
return slice(15, 21)
def assoc_end_date(self):
"""Format: yymmdd. May not be the same as the dates of the train schedules."""
return slice(21, 27)
def assoc_days(self):
"""No description."""
return slice(27, 34)
def assoc_cat(self):
"""The ASSOC-CAT for the base UID (first byte), followed by the ASSOC-CAT for the assoc. UID (second byte). Note: Although this field isnt specified as having blanks in the Network Rail CIF specification, if blanks are supplied they will be carried forward. (Blanks are used to override the permanent value in overlays and cancellations). JJ for Joining trains and VV for Dividing trains. NP for Next/Previous Associations may also be displayed but as this is an Operating association it should be ignored by journey planners."""
return slice(34, 36)
def assoc_date_ind(self):
"""S = Standard. N = Over-next-midnight. P = Over-previous-midnight. Note: Although this field isnt specified as having blanks in the Network Rail CIF specification, if blanks are supplied they will be carried forward. (Blanks are used to override the permanent value in overlays and cancellations)."""
return slice(36, 37)
def assoc_location(self):
"""TIPLOC where association occurs."""
return slice(37, 44)
def base_location_suffix(self):
"""Values are space or 2."""
return slice(44, 45)
def assoc_location_suffix(self):
"""Values are space or 2."""
return slice(45, 46)
def diagram_type(self):
"""With the constant value T."""
return slice(46, 47)
def association_type(self):
"""P = Passenger use. O = Operating use. Note: Although this field isnt specified as having blanks in the Network Rail CIF specification, if blanks are supplied they will be carried forward. (If blank then association defaults to Operating and should be ignored by journey planners)."""
return slice(47, 48)
def filler(self):
"""No description."""
return slice(48, 79)
def stp_indicator(self):
"""Read in conjunction with the Transaction Type in Field 2. C = STP cancellation of permanent schedule. N = New STP schedule. O = STP overlay of permanent schedule. P = Permanent."""
return slice(79, 80)
@dataclass
class LO:
def record_identity(self):
"""With the constant value LO."""
return slice(0, 2)
def location(self):
"""TIPLOC + Suffix. Suffix is always the eighth character."""
return slice(2, 10)
def scheduled_departure_time(self):
"""No description."""
return slice(10, 15)
def public_departure_time(self):
"""If there is no Public Departure time this field will default to 0000."""
return slice(15, 19)
def platform(self):
"""No description."""
return slice(19, 22)
def line(self):
"""No description."""
return slice(22, 25)
def engineering_allowance(self):
"""No description."""
return slice(25, 27)
def pathing_allowance(self):
"""No description."""
return slice(27, 29)
def activity(self):
"""Up to 6 activity codes may be present. The first 2 characters will always be TB (train begins). If there are no other activity codes, this defaults to being an advertised departure."""
return slice(29, 41)
def performance_allowance(self):
"""No description."""
return slice(41, 43)
def spare(self):
"""No description."""
return slice(43, 80)
@dataclass
class BX:
def record_identity(self):
"""With the constant value BX."""
return slice(0, 2)
def traction_class(self):
"""Not used always blank."""
return slice(2, 6)
def uic_code(self):
"""Only populated for trains travelling to/from Europe via the Channel Tunnel, otherwise blank."""
return slice(6, 11)
def atoc_code(self):
"""No description."""
return slice(11, 13)
def applicable_timetable_code(self):
"""Always set to Y."""
return slice(13, 14)
def retail_service_id(self):
"""No description."""
return slice(14, 22)
def source(self):
"""Not used always blank."""
return slice(22, 23)
def spare(self):
"""No description."""
return slice(23, 80)
@dataclass
class TI:
def record_identity(self):
"""With the constant value TI."""
return slice(0, 2)
def tiploc_code(self):
"""A TIPLOC is 4-7 characters. If less than 7 then it will be padded by blanks."""
return slice(2, 9)
def capitals(self):
"""Defines capitalisation of TIPLOC. Can be ignored for retailing/journey planners."""
return slice(9, 11)
def national_location_code(self):
"""No description."""
return slice(11, 17)
def nlc_check_character(self):
"""No description."""
return slice(17, 18)
def tps_description(self):
"""No description."""
return slice(18, 44)
def stanox(self):
"""TOPS location code."""
return slice(44, 49)
def po_mcp_code(self):
"""Post Office Location Code. (Not used but may contain historic data or three blank spaces followed by 0)."""
return slice(49, 53)
def crs_code(self):
"""No description."""
return slice(53, 56)
def description(self):
"""Description used in LENNON."""
return slice(56, 72)
def spare(self):
"""No description."""
return slice(72, 80)
+1 -1
View File
@@ -65,7 +65,7 @@ def fetch_nr_token(
def fetch_nr_timetable_files(
config: NRConfig | None = None, # pyright: ignore[reportRedeclaration]
token: str | None = None, # pyright: ignore[reportRedeclaration]
attempts: int = 1,
attempts: int = 3,
) -> ZipFile:
config: NRConfig = config if config is not None else NRConfig.from_env()
token: str = (
+41 -59
View File
@@ -11,10 +11,9 @@ Aimed primarily towards producing a reduced sqlite database.
# Imports
from itertools import pairwise
from pathlib import Path
import pandas as pd
import numpy as np
import sqlite3
import os
import pandas as pd
from pypdf import PageObject, PdfReader
@@ -149,67 +148,50 @@ def read_specification_table_raws(
return tables
def create_mca_specification_dbschema(
tables: dict[str, pd.DataFrame],
db_path: Path | None = None,
def generate_mca_stubs(
data_dir: Path | None = None,
):
db_path = (
db_path
if db_path is not None
else Path(os.environ.get("NR_DATADIR", "~/.cache/nr_data/timetable.db"))
)
db_path.parent.mkdir(exist_ok=True, parents=True)
connection = sqlite3.connect(db_path)
cursor = connection.cursor()
for name, df in tables.items():
if (_n := name.split("_"))[0] != "MCA" or len(_n) != 2:
text = [
"# This file is generated by parsing.generate_mca_stubs.",
"# Do not modify by hand.",
"",
"# Imports",
"from dataclasses import dataclass",
"",
"# Classes",
]
for name, df in read_specification_table_raws(data_dir).items():
if name[:4] != "MCA_":
continue
df["Start Index"] = df["Position"].apply(lambda s: int(s.split("-")[0]) - 1)
df["End Index"] = df["Position"].apply(lambda s: int(s.split("-")[-1]))
_ = cursor.execute(f"DROP TABLE IF EXISTS spec_{name.lower()}")
_ = cursor.execute(
f"""
CREATE TABLE spec_{name.lower()}
({", ".join([col.lower().replace(" ", "_") for col in df.columns])})
""",
)
_ = cursor.executemany(
f"""
INSERT INTO spec_{name.lower()}
VALUES({", ".join(["?" for _ in df.columns])})
""",
[list(row.values) for _, row in df.iterrows()],
)
connection.commit()
connection.close()
return db_path
text += [
"@dataclass",
f"class {name.split('_')[1]}:",
]
for _, row in df.iterrows():
func_name = (
row["Field Description"]
.lower()
.replace(" ", "_")
.replace("-", "_")
.split("/")[0]
.strip()
)
start = int(row["Position"].split("-")[0]) - 1
stop = int(row["Position"].split("-")[-1])
text += [
"",
f" def {func_name}(self):",
f' """{t if (t := str(row["Notes"])) != "nan" else "No description."}"""',
f" return slice({start}, {stop})",
]
text += ["", ""]
with open(Path(__file__).parent / "mca_stubs.py", "w") as wf:
return wf.write("\n".join(text))
# Script
def main(
skip_pdf: bool = False,
pdf_spec_path: Path | None = None,
raw_spec_dir: Path | None = None,
):
if not skip_pdf:
try:
tables = extract_specification_document_tables(pdf_spec_path)
_ = store_specification_table_raws(tables, raw_spec_dir)
except FileNotFoundError:
pass
try:
tables = read_specification_table_raws(raw_spec_dir)
except FileNotFoundError:
raise FileNotFoundError(
"The tables generated from the RSP's specification were not found. "
+ "This means neither the cached version nor the original .pdf is available. "
+ "Try suppling either to their default locations, or supplying custom directories. "
+ "Manual fix: extract_specification_document_tables then store_specification_table_raws. "
)
_ = create_mca_specification_dbschema(tables)
def main():
print(generate_mca_stubs())
if __name__ == "__main__":