Updated the raw_mca_... table generation to include line number from the file, and schedule number - although we may need to investigate how the last entry behaves with 'ZZ' records and any others. We don't want to inherit the technical debt of remembering this one case every time.
This commit is contained in:
@@ -200,6 +200,7 @@ def create_mca_raw_dbtables(
|
|||||||
zipfile: ZipFile | None = None,
|
zipfile: ZipFile | None = None,
|
||||||
db_path: Path | None = None,
|
db_path: Path | None = None,
|
||||||
allow_fetch: bool = True,
|
allow_fetch: bool = True,
|
||||||
|
print_progress: bool = True,
|
||||||
) -> dict[str, str]:
|
) -> dict[str, str]:
|
||||||
db_path = _validate_db_path(db_path)
|
db_path = _validate_db_path(db_path)
|
||||||
if zipfile is None:
|
if zipfile is None:
|
||||||
@@ -240,7 +241,7 @@ def create_mca_raw_dbtables(
|
|||||||
new_columns = [
|
new_columns = [
|
||||||
col.split("/")[0].lower().replace(" ", "_").replace("-", "_")
|
col.split("/")[0].lower().replace(" ", "_").replace("-", "_")
|
||||||
for col in spec.field_description
|
for col in spec.field_description
|
||||||
]
|
] + ["line_number", "schedule_number"]
|
||||||
_ = cursor.execute(f"DROP TABLE IF EXISTS {mappings[name].lower()}")
|
_ = cursor.execute(f"DROP TABLE IF EXISTS {mappings[name].lower()}")
|
||||||
_ = cursor.execute(
|
_ = cursor.execute(
|
||||||
f"""
|
f"""
|
||||||
@@ -256,9 +257,11 @@ def create_mca_raw_dbtables(
|
|||||||
if name.split(".")[-1] == "MCA"
|
if name.split(".")[-1] == "MCA"
|
||||||
][0]
|
][0]
|
||||||
file = zipfile.open(zipinfo)
|
file = zipfile.open(zipinfo)
|
||||||
counter = 0
|
schedule_number, line_number = -1, -1
|
||||||
while (line := file.readline().decode()) != "":
|
while (line := file.readline().decode()) != "":
|
||||||
|
line_number += 1
|
||||||
record_type = line[:2]
|
record_type = line[:2]
|
||||||
|
schedule_number += int(record_type == "BS")
|
||||||
target_table = f"raw_mca_{record_type.lower()}"
|
target_table = f"raw_mca_{record_type.lower()}"
|
||||||
start_indexes = all_start_indexes.get(target_table)
|
start_indexes = all_start_indexes.get(target_table)
|
||||||
end_indexes = all_end_indexes.get(target_table)
|
end_indexes = all_end_indexes.get(target_table)
|
||||||
@@ -269,12 +272,13 @@ def create_mca_raw_dbtables(
|
|||||||
"'" + line[lb:ub].replace("'", "") + "'"
|
"'" + line[lb:ub].replace("'", "") + "'"
|
||||||
for lb, ub in zip(start_indexes, end_indexes, strict=True)
|
for lb, ub in zip(start_indexes, end_indexes, strict=True)
|
||||||
]
|
]
|
||||||
|
+ [f"{line_number}", f"{schedule_number}"]
|
||||||
)
|
)
|
||||||
_ = cursor.execute(f"INSERT INTO {target_table} VALUES({values})")
|
_ = cursor.execute(f"INSERT INTO {target_table} VALUES({values})")
|
||||||
counter += 1
|
if line_number % 3737 == 0 and print_progress:
|
||||||
if counter % 1111 == 0:
|
print(f" {line_number:9,} {line[:-1]}", end="\r")
|
||||||
print(f" {counter:,}", end="\r")
|
if print_progress:
|
||||||
print()
|
print()
|
||||||
connection.commit()
|
connection.commit()
|
||||||
connection.close()
|
connection.close()
|
||||||
return mappings
|
return mappings
|
||||||
@@ -304,7 +308,10 @@ def main(
|
|||||||
)
|
)
|
||||||
|
|
||||||
_ = create_mca_specification_dbtables(tables)
|
_ = create_mca_specification_dbtables(tables)
|
||||||
return create_mca_raw_dbtables()
|
st = np.datetime64("now")
|
||||||
|
result = create_mca_raw_dbtables()
|
||||||
|
print(np.datetime64("now") - st)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user