From 51c4f5030cb1060086de0d05e500ac8225f926a9 Mon Sep 17 00:00:00 2001 From: Samuel Jones Date: Sat, 23 May 2026 10:38:03 +0100 Subject: [PATCH] Updated the raw_mca_... table generation to include line number from the file, and schedule number - although we may need to investigate how the last entry behaves with 'ZZ' records and any others. We don't want to inherit the technical debt of remembering this one case every time. --- src/national_rail_timetable/parsing.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/national_rail_timetable/parsing.py b/src/national_rail_timetable/parsing.py index 811b35c..5c56723 100644 --- a/src/national_rail_timetable/parsing.py +++ b/src/national_rail_timetable/parsing.py @@ -200,6 +200,7 @@ def create_mca_raw_dbtables( zipfile: ZipFile | None = None, db_path: Path | None = None, allow_fetch: bool = True, + print_progress: bool = True, ) -> dict[str, str]: db_path = _validate_db_path(db_path) if zipfile is None: @@ -240,7 +241,7 @@ def create_mca_raw_dbtables( new_columns = [ col.split("/")[0].lower().replace(" ", "_").replace("-", "_") for col in spec.field_description - ] + ] + ["line_number", "schedule_number"] _ = cursor.execute(f"DROP TABLE IF EXISTS {mappings[name].lower()}") _ = cursor.execute( f""" @@ -256,9 +257,11 @@ def create_mca_raw_dbtables( if name.split(".")[-1] == "MCA" ][0] file = zipfile.open(zipinfo) - counter = 0 + schedule_number, line_number = -1, -1 while (line := file.readline().decode()) != "": + line_number += 1 record_type = line[:2] + schedule_number += int(record_type == "BS") target_table = f"raw_mca_{record_type.lower()}" start_indexes = all_start_indexes.get(target_table) end_indexes = all_end_indexes.get(target_table) @@ -269,12 +272,13 @@ def create_mca_raw_dbtables( "'" + line[lb:ub].replace("'", "") + "'" for lb, ub in zip(start_indexes, end_indexes, strict=True) ] + + [f"{line_number}", f"{schedule_number}"] ) _ = cursor.execute(f"INSERT INTO {target_table} VALUES({values})") - counter += 1 - if counter % 1111 == 0: - print(f" {counter:,}", end="\r") - print() + if line_number % 3737 == 0 and print_progress: + print(f" {line_number:9,} {line[:-1]}", end="\r") + if print_progress: + print() connection.commit() connection.close() return mappings @@ -304,7 +308,10 @@ def main( ) _ = create_mca_specification_dbtables(tables) - return create_mca_raw_dbtables() + st = np.datetime64("now") + result = create_mca_raw_dbtables() + print(np.datetime64("now") - st) + return result if __name__ == "__main__":