diff --git a/data/specification_tables/MCA_AA.csv b/data/specification_tables/MCA_AA.csv new file mode 100644 index 0000000..53d59e2 --- /dev/null +++ b/data/specification_tables/MCA_AA.csv @@ -0,0 +1,17 @@ +Field,Field Description,Length,Position,Notes +1,Record Identity,2,1-2,With the constant value ‘AA’. +2,Transaction Type,1,3-3,‘N’ = New. ‘D’ = Delete. ‘R’ = Revise. +3,Base UID,6,4-9,"One of the trains involved in the association. This will always be the through train, not the splitting/joining portion." +4,Assoc UID,6,10-15,The other train involved. +5,Assoc Start date,6,16-21,Format: yymmdd. May not be the same as the dates of the train schedules. +6,Assoc End date,6,22-27,Format: yymmdd. May not be the same as the dates of the train schedules. +7,Assoc Days,7,28-34, +8,Assoc Cat,2,35-36,"The ASSOC-CAT for the base UID (first byte), followed by the ASSOC-CAT for the assoc. UID (second byte). Note: Although this field isn’t specified as having blanks in the Network Rail CIF specification, if blanks are supplied they will be carried forward. (Blanks are used to override the permanent value in overlays and cancellations). ‘JJ’ for Joining trains and ‘VV’ for Dividing trains. ‘NP’ for Next/Previous Associations may also be displayed but as this is an Operating association it should be ignored by journey planners." +9,Assoc Date Ind,1,37-37,"‘S’ = Standard. ‘N’ = Over-next-midnight. ‘P’ = Over-previous-midnight. Note: Although this field isn’t specified as having blanks in the Network Rail CIF specification, if blanks are supplied they will be carried forward. (Blanks are used to override the permanent value in overlays and cancellations)." +10,Assoc Location,7,38-44,TIPLOC where association occurs. +11,Base Location Suffix,1,45-45,Values are space or 2. +12,Assoc Location Suffix,1,46-46,Values are space or 2. +13,Diagram Type,1,47-47,With the constant value ‘T’. +14,Association Type,1,48-48,"‘P’ = Passenger use. ‘O’ = Operating use. Note: Although this field isn’t specified as having blanks in the Network Rail CIF specification, if blanks are supplied they will be carried forward. (If blank then association defaults to Operating and should be ignored by journey planners)." +15,Filler,31,49-79, +16,STP indicator,1,80-80,Read in conjunction with the ‘Transaction Type’ in Field 2. ‘C’ = STP cancellation of permanent schedule. ‘N’ = New STP schedule. ‘O’ = STP overlay of permanent schedule. ‘P’ = Permanent. diff --git a/data/specification_tables/MCA_BS.csv b/data/specification_tables/MCA_BS.csv new file mode 100644 index 0000000..0fa123d --- /dev/null +++ b/data/specification_tables/MCA_BS.csv @@ -0,0 +1,27 @@ +Field,Field Description,Length,Position,Notes +1,Record Identity,2,1-2,With the constant value ‘BS’. +2,Transaction Type,1,3-3,‘N’ = New. ‘D’ = Delete. ‘R’ = Revise. +3,Train UID,6,4-9,Unique train Identifier. +4,Date Runs From,6,10-15,yymmdd +5,Date Runs To,6,16-21,yymmdd +6,Days Run,7,22-28, +7,Bank Holiday Running,1,29-29, +8,Train Status,1,30-30, +9,Train Category,2,31-32, +10,Train Identity,4,33-36, +11,Headcode,4,37-40, +12,Course Indicator,1,41-41,Not used - always set to 1. +13,Profit Centre Code/ Train Service Code,8,42-49, +14,Business Sector,1,50-50,Now used to contain the portion suffix for RSID +15,Power Type,3,51-53, +16,Timing Load,4,54-57, +17,Speed,3,58-60, +18,Operating Chars,6,61-66, +19,Train Class,1,67-67, +20,Sleepers,1,68-68, +21,Reservations,1,69-69,Permitted values are: A – Seat Reservations Compulsory (R symbol in white box) E – Reservations for Bicycles Essential (Inverted black triangle) R – Seat Reservations Recommended (R symbol in black box) S – Seat Reservations possible from any station (white diamond symbol) +22,Connect Indicator,1,70-70,Not used - always set to blank. +23,Catering Code,4,71-74, +24,Service Branding,4,75-78, +25,Spare,1,79-79, +26,STP indicator,1,80-80,‘C’ = STP cancellation of permanent schedule. ‘N’ = New STP schedule. ‘O’ = STP overlay of permanent schedule. ‘P’ = Permanent. Read in association with the Transaction Type in Field 2 diff --git a/data/specification_tables/MCA_BX.csv b/data/specification_tables/MCA_BX.csv new file mode 100644 index 0000000..a191624 --- /dev/null +++ b/data/specification_tables/MCA_BX.csv @@ -0,0 +1,9 @@ +Field,Field Description,Length,Position,Notes +1,Record Identity,2,1-2,With the constant value ‘BX’. +2,Traction Class,4,3-6,Not used – always blank. +3,UIC Code,5,7-11,"Only populated for trains travelling to/from Europe via the Channel Tunnel, otherwise blank." +4,ATOC Code,2,12-13, +5,Applicable Timetable Code,1,14-14,Always set to ‘Y’. +6,Retail Service ID,8,15-22, +7,Source,1,23-23,Not used – always blank. +8,Spare,57,24-80, diff --git a/data/specification_tables/MCA_CR.csv b/data/specification_tables/MCA_CR.csv new file mode 100644 index 0000000..797ed99 --- /dev/null +++ b/data/specification_tables/MCA_CR.csv @@ -0,0 +1,23 @@ +Field,Field Description,Length,Position,Notes +1,Record Identity,2,1-2,With the constant value ‘CR’. +2,Location,8,3-10,TIPLOC + Suffix. Suffix is always the eighth character. +3,Train Category,2,11-12, +4,Train Identity,4,13-16, +5,Headcode,4,17-20, +6,Course Indicator,1,21-21, +7,Profit Centre Code/ Train Service Code,8,22-29, +8,Business Sector,1,30-30, +9,Power Type,3,31-33, +10,Timing Load,4,34-37, +11,Speed,3,38-40, +12,Operating Chars,6,41-46, +13,Train Class,1,47-47, +14,Sleepers,1,48-48, +15,Reservations,1,49-49, +16,Connect Indicator,1,50-50, +17,Catering Code,4,51-54, +18,Service Branding,4,55-58, +19,Traction Class,4,59-62, +20,UIC Code,5,63-67,"Only populated for trains travelling to/from Europe via the Channel Tunnel, otherwise blank." +21,Retail Service ID,8,68-75, +22,Spare,5,76-80, diff --git a/data/specification_tables/MCA_HD.csv b/data/specification_tables/MCA_HD.csv new file mode 100644 index 0000000..b142a5b --- /dev/null +++ b/data/specification_tables/MCA_HD.csv @@ -0,0 +1,12 @@ +Field,Field Description,Length,Position,Notes +1,Record Identity,2,1-2,With the constant value ‘HD’. +2,File Identity,20,3-22, +3,Date of Extract,6,23-28,Format ddmmyy defining the date that the BTD extract file was created. +4,Time of Extract,4,29-32,hhmm defining the time that the BTD extract file was created. +5,Current File Reference,7,33-39,Unique file reference. +6,Last-file-reference,7,40-46,Unique file reference. +7,Update Indicator,1,47,‘U’=Update. ‘F’=Full extract. +8,Version,1,48,Version identifier of CIF software. +9,Extract start date,6,49-54,Same as Field 3 above. +10,Extract end date,6,55-60, +11,Spare,20,61-80, diff --git a/data/specification_tables/MCA_LI.csv b/data/specification_tables/MCA_LI.csv new file mode 100644 index 0000000..1ddfbf4 --- /dev/null +++ b/data/specification_tables/MCA_LI.csv @@ -0,0 +1,16 @@ +Field,Field Description,Length,Position,Notes +1,Record Identity,2,1-2,With the constant value ‘LI’. +2,Location,8,3-10,TIPLOC + Suffix. Suffix is always the eighth character. +3,Scheduled Arrival Time,5,11-15, +4,Scheduled Departure Time,5,16-20, +5,Scheduled Pass,5,21-25, +6,Public Arrival,4,26-29,If there is no Public Arrival time this field will default to 0000. +7,Public Departure,4,30-33,If there is no Public Departure time this field will default to 0000. +8,Platform,3,34-36, +9,Line,3,37-39, +10,Path,3,40-42, +11,Activity,12,43-54,Up to 6 activity codes may be present. +12,Engineering Allowance,2,55-56, +13,Pathing Allowance,2,57-58, +14,Performance Allowance,2,59-60, +15,Spare,20,61-80, diff --git a/data/specification_tables/MCA_LO.csv b/data/specification_tables/MCA_LO.csv new file mode 100644 index 0000000..6b004ff --- /dev/null +++ b/data/specification_tables/MCA_LO.csv @@ -0,0 +1,12 @@ +Field,Field Description,Length,Position,Notes +1,Record Identity,2,1-2,With the constant value ‘LO’. +2,Location,8,3-10,TIPLOC + Suffix. Suffix is always the eighth character. +3,Scheduled Departure Time,5,11-15, +4,Public Departure Time,4,16-19,If there is no Public Departure time this field will default to 0000. +5,Platform,3,20-22, +6,Line,3,23-25, +7,Engineering Allowance,2,26-27, +8,Pathing Allowance,2,28-29, +9,Activity,12,30-41,"Up to 6 activity codes may be present. The first 2 characters will always be TB (train begins). If there are no other activity codes, this defaults to being an advertised departure." +10,Performance Allowance,2,42-43, +11,Spare,37,44-80, diff --git a/data/specification_tables/MCA_LT.csv b/data/specification_tables/MCA_LT.csv new file mode 100644 index 0000000..99a2b5f --- /dev/null +++ b/data/specification_tables/MCA_LT.csv @@ -0,0 +1,9 @@ +Field,Field Description,Length,Position,Notes +1,Record Identity,2,1-2,With the constant value ‘LT’. +2,Location,8,3-10,TIPLOC +Suffix. Suffix is always the eighth character. +3,Scheduled Arrival Time,5,11-15, +4,Public Arrival Time,4,16-19,If there is no Public Arrival time this field will default to 0000. +5,Platform,3,20-22, +6,Path,3,23-25, +7,Activity,12,26-37,"Up to 6 activity codes may be present. The first 2 characters will always be TF (train finishes). If there are no other activity codes, this defaults to being an advertised arrival." +8,Spare,43,38-80, diff --git a/data/specification_tables/MCA_TA.csv b/data/specification_tables/MCA_TA.csv new file mode 100644 index 0000000..71abbd5 --- /dev/null +++ b/data/specification_tables/MCA_TA.csv @@ -0,0 +1,13 @@ +Field,Field Description,Length,Position,Notes +1,Record Identity,2,1-2,With the constant value ‘TA’. +2,TIPLOC code,7,3-9,A TIPLOC is 4-7 characters. If less than 7 then it will be padded by blanks. +3,Capitals,2,10-11,Defines capitalisation of TIPLOC. Can be ignored for retailing/journey planners. +4,National Location Code,6,12-17, +5,NLC Check Character,1,18-18, +6,TPS Description,26,19-44, +7,STANOX,5,45-49,TOPS location code. +8,PO MCP Code,4,50-53,Post Office Location Code. (Not used but may contain historic data or three blank spaces followed by 0). +9,CRS Code,3,54-56, +10,Description,16,57-72,Description used in LENNON. +11,New TIPLOC,7,73-79,Only present if TIPLOC change. +12,Spare,1,80-80, diff --git a/data/specification_tables/MCA_TD.csv b/data/specification_tables/MCA_TD.csv new file mode 100644 index 0000000..19ad542 --- /dev/null +++ b/data/specification_tables/MCA_TD.csv @@ -0,0 +1,4 @@ +Field,Field Description,Length,Position,Notes +1,Record Identity,2,1-2,With the constant value ‘TD’. +2,TIPLOC code,7,3-9, +3,Spare,71,10-80, diff --git a/data/specification_tables/MCA_TI.csv b/data/specification_tables/MCA_TI.csv new file mode 100644 index 0000000..22a29fb --- /dev/null +++ b/data/specification_tables/MCA_TI.csv @@ -0,0 +1,12 @@ +Field,Field Description,Length,Position,Notes +1,Record Identity,2,1-2,With the constant value ‘TI’. +2,TIPLOC code,7,3-9,A TIPLOC is 4-7 characters. If less than 7 then it will be padded by blanks. +3,Capitals,2,10-11,Defines capitalisation of TIPLOC. Can be ignored for retailing/journey planners. +4,National Location Code,6,12-17, +5,NLC Check Character,1,18-18, +6,TPS Description,26,19-44, +7,STANOX,5,45-49,TOPS location code. +8,PO MCP Code,4,50-53,Post Office Location Code. (Not used but may contain historic data or three blank spaces followed by 0). +9,CRS Code,3,54-56, +10,Description,16,57-72,Description used in LENNON. +11,Spare,8,73-80, diff --git a/data/specification_tables/MCA_ZZ.csv b/data/specification_tables/MCA_ZZ.csv new file mode 100644 index 0000000..ab52b0b --- /dev/null +++ b/data/specification_tables/MCA_ZZ.csv @@ -0,0 +1,3 @@ +Field,Field Description,Length,Position,Notes +1,Record Identity,2,1-2,With the constant value ‘ZZ’. +2,Spare,78,3-80, diff --git a/src/national_rail_timetable/__main__.py b/src/national_rail_timetable/__main__.py index c332b91..b6f6d71 100644 --- a/src/national_rail_timetable/__main__.py +++ b/src/national_rail_timetable/__main__.py @@ -1,6 +1,12 @@ from national_rail_timetable.nr_requests import fetch_nr_token, fetch_nr_timetable_files -from national_rail_timetable.parsing import extract_specification_document_tables +from national_rail_timetable.parsing import ( + extract_specification_document_tables, + store_specification_table_raws, + read_specification_table_raws, +) # print(fetch_nr_token()) # print(fetch_nr_timetable_files()) -print(extract_specification_document_tables()) +tables = extract_specification_document_tables() +print(store_specification_table_raws(tables)) +print(read_specification_table_raws()) diff --git a/src/national_rail_timetable/parsing.py b/src/national_rail_timetable/parsing.py index 8e7f069..d732fee 100644 --- a/src/national_rail_timetable/parsing.py +++ b/src/national_rail_timetable/parsing.py @@ -30,13 +30,20 @@ SPECIFICATION_TABLE_LOCATIONS = { "MCA_TD": (23, 0), "MCA_ZZ": (23, 1), } +DEFAULT_RAW_SPEC_DATA_DIR = Path(__file__).parents[2] / "data/specification_tables" # Functions # TODO: Implement better header check and row breaks to ingest .ALF and others. +# WARN: Row breaks may present an issue in that they may not be determinable generically. def extract_specification_document_tables( path: Path | None = None, # pyright: ignore[reportRedeclaration] ) -> dict[str, pd.DataFrame]: + """ + Given the path to RSP's Timetable Specification document, + read and extract (currently just .MCA's) descriptor tables. + Returns a dictionary of pandas DataFrames by _. + """ path: Path = ( path if path is not None @@ -114,3 +121,26 @@ def extract_specification_document_tables( ) return tables + + +def store_specification_table_raws( + tables: dict[str, pd.DataFrame], + data_dir: Path | None = None, +): + data_dir = data_dir if data_dir is not None else DEFAULT_RAW_SPEC_DATA_DIR + data_dir.mkdir(exist_ok=True, parents=True) + for name, df in tables.items(): + df.to_csv(data_dir / f"{name}.csv", index=False) + return data_dir + + +def read_specification_table_raws( + data_dir: Path | None = None, +) -> dict[str, pd.DataFrame]: + data_dir = data_dir if data_dir is not None else DEFAULT_RAW_SPEC_DATA_DIR + tables = {} + for path in data_dir.iterdir(): + if path.is_dir() or path.name[-4:] != ".csv": + continue + tables[path.name[:-4]] = pd.read_csv(path) + return tables