diff --git a/src/national_rail_timetable/parsing.py b/src/national_rail_timetable/parsing.py index 811b35c..5c56723 100644 --- a/src/national_rail_timetable/parsing.py +++ b/src/national_rail_timetable/parsing.py @@ -200,6 +200,7 @@ def create_mca_raw_dbtables( zipfile: ZipFile | None = None, db_path: Path | None = None, allow_fetch: bool = True, + print_progress: bool = True, ) -> dict[str, str]: db_path = _validate_db_path(db_path) if zipfile is None: @@ -240,7 +241,7 @@ def create_mca_raw_dbtables( new_columns = [ col.split("/")[0].lower().replace(" ", "_").replace("-", "_") for col in spec.field_description - ] + ] + ["line_number", "schedule_number"] _ = cursor.execute(f"DROP TABLE IF EXISTS {mappings[name].lower()}") _ = cursor.execute( f""" @@ -256,9 +257,11 @@ def create_mca_raw_dbtables( if name.split(".")[-1] == "MCA" ][0] file = zipfile.open(zipinfo) - counter = 0 + schedule_number, line_number = -1, -1 while (line := file.readline().decode()) != "": + line_number += 1 record_type = line[:2] + schedule_number += int(record_type == "BS") target_table = f"raw_mca_{record_type.lower()}" start_indexes = all_start_indexes.get(target_table) end_indexes = all_end_indexes.get(target_table) @@ -269,12 +272,13 @@ def create_mca_raw_dbtables( "'" + line[lb:ub].replace("'", "") + "'" for lb, ub in zip(start_indexes, end_indexes, strict=True) ] + + [f"{line_number}", f"{schedule_number}"] ) _ = cursor.execute(f"INSERT INTO {target_table} VALUES({values})") - counter += 1 - if counter % 1111 == 0: - print(f" {counter:,}", end="\r") - print() + if line_number % 3737 == 0 and print_progress: + print(f" {line_number:9,} {line[:-1]}", end="\r") + if print_progress: + print() connection.commit() connection.close() return mappings @@ -304,7 +308,10 @@ def main( ) _ = create_mca_specification_dbtables(tables) - return create_mca_raw_dbtables() + st = np.datetime64("now") + result = create_mca_raw_dbtables() + print(np.datetime64("now") - st) + return result if __name__ == "__main__":