Parsing of RSP's specification now cached to /data. This means that we can ship the tables with the project, rather than the .pdf being a requirement of use.
This commit is contained in:
@@ -0,0 +1,17 @@
|
|||||||
|
Field,Field Description,Length,Position,Notes
|
||||||
|
1,Record Identity,2,1-2,With the constant value ‘AA’.
|
||||||
|
2,Transaction Type,1,3-3,‘N’ = New. ‘D’ = Delete. ‘R’ = Revise.
|
||||||
|
3,Base UID,6,4-9,"One of the trains involved in the association. This will always be the through train, not the splitting/joining portion."
|
||||||
|
4,Assoc UID,6,10-15,The other train involved.
|
||||||
|
5,Assoc Start date,6,16-21,Format: yymmdd. May not be the same as the dates of the train schedules.
|
||||||
|
6,Assoc End date,6,22-27,Format: yymmdd. May not be the same as the dates of the train schedules.
|
||||||
|
7,Assoc Days,7,28-34,
|
||||||
|
8,Assoc Cat,2,35-36,"The ASSOC-CAT for the base UID (first byte), followed by the ASSOC-CAT for the assoc. UID (second byte). Note: Although this field isn’t specified as having blanks in the Network Rail CIF specification, if blanks are supplied they will be carried forward. (Blanks are used to override the permanent value in overlays and cancellations). ‘JJ’ for Joining trains and ‘VV’ for Dividing trains. ‘NP’ for Next/Previous Associations may also be displayed but as this is an Operating association it should be ignored by journey planners."
|
||||||
|
9,Assoc Date Ind,1,37-37,"‘S’ = Standard. ‘N’ = Over-next-midnight. ‘P’ = Over-previous-midnight. Note: Although this field isn’t specified as having blanks in the Network Rail CIF specification, if blanks are supplied they will be carried forward. (Blanks are used to override the permanent value in overlays and cancellations)."
|
||||||
|
10,Assoc Location,7,38-44,TIPLOC where association occurs.
|
||||||
|
11,Base Location Suffix,1,45-45,Values are space or 2.
|
||||||
|
12,Assoc Location Suffix,1,46-46,Values are space or 2.
|
||||||
|
13,Diagram Type,1,47-47,With the constant value ‘T’.
|
||||||
|
14,Association Type,1,48-48,"‘P’ = Passenger use. ‘O’ = Operating use. Note: Although this field isn’t specified as having blanks in the Network Rail CIF specification, if blanks are supplied they will be carried forward. (If blank then association defaults to Operating and should be ignored by journey planners)."
|
||||||
|
15,Filler,31,49-79,
|
||||||
|
16,STP indicator,1,80-80,Read in conjunction with the ‘Transaction Type’ in Field 2. ‘C’ = STP cancellation of permanent schedule. ‘N’ = New STP schedule. ‘O’ = STP overlay of permanent schedule. ‘P’ = Permanent.
|
||||||
|
@@ -0,0 +1,27 @@
|
|||||||
|
Field,Field Description,Length,Position,Notes
|
||||||
|
1,Record Identity,2,1-2,With the constant value ‘BS’.
|
||||||
|
2,Transaction Type,1,3-3,‘N’ = New. ‘D’ = Delete. ‘R’ = Revise.
|
||||||
|
3,Train UID,6,4-9,Unique train Identifier.
|
||||||
|
4,Date Runs From,6,10-15,yymmdd
|
||||||
|
5,Date Runs To,6,16-21,yymmdd
|
||||||
|
6,Days Run,7,22-28,
|
||||||
|
7,Bank Holiday Running,1,29-29,
|
||||||
|
8,Train Status,1,30-30,
|
||||||
|
9,Train Category,2,31-32,
|
||||||
|
10,Train Identity,4,33-36,
|
||||||
|
11,Headcode,4,37-40,
|
||||||
|
12,Course Indicator,1,41-41,Not used - always set to 1.
|
||||||
|
13,Profit Centre Code/ Train Service Code,8,42-49,
|
||||||
|
14,Business Sector,1,50-50,Now used to contain the portion suffix for RSID
|
||||||
|
15,Power Type,3,51-53,
|
||||||
|
16,Timing Load,4,54-57,
|
||||||
|
17,Speed,3,58-60,
|
||||||
|
18,Operating Chars,6,61-66,
|
||||||
|
19,Train Class,1,67-67,
|
||||||
|
20,Sleepers,1,68-68,
|
||||||
|
21,Reservations,1,69-69,Permitted values are: A – Seat Reservations Compulsory (R symbol in white box) E – Reservations for Bicycles Essential (Inverted black triangle) R – Seat Reservations Recommended (R symbol in black box) S – Seat Reservations possible from any station (white diamond symbol)
|
||||||
|
22,Connect Indicator,1,70-70,Not used - always set to blank.
|
||||||
|
23,Catering Code,4,71-74,
|
||||||
|
24,Service Branding,4,75-78,
|
||||||
|
25,Spare,1,79-79,
|
||||||
|
26,STP indicator,1,80-80,‘C’ = STP cancellation of permanent schedule. ‘N’ = New STP schedule. ‘O’ = STP overlay of permanent schedule. ‘P’ = Permanent. Read in association with the Transaction Type in Field 2
|
||||||
|
@@ -0,0 +1,9 @@
|
|||||||
|
Field,Field Description,Length,Position,Notes
|
||||||
|
1,Record Identity,2,1-2,With the constant value ‘BX’.
|
||||||
|
2,Traction Class,4,3-6,Not used – always blank.
|
||||||
|
3,UIC Code,5,7-11,"Only populated for trains travelling to/from Europe via the Channel Tunnel, otherwise blank."
|
||||||
|
4,ATOC Code,2,12-13,
|
||||||
|
5,Applicable Timetable Code,1,14-14,Always set to ‘Y’.
|
||||||
|
6,Retail Service ID,8,15-22,
|
||||||
|
7,Source,1,23-23,Not used – always blank.
|
||||||
|
8,Spare,57,24-80,
|
||||||
|
@@ -0,0 +1,23 @@
|
|||||||
|
Field,Field Description,Length,Position,Notes
|
||||||
|
1,Record Identity,2,1-2,With the constant value ‘CR’.
|
||||||
|
2,Location,8,3-10,TIPLOC + Suffix. Suffix is always the eighth character.
|
||||||
|
3,Train Category,2,11-12,
|
||||||
|
4,Train Identity,4,13-16,
|
||||||
|
5,Headcode,4,17-20,
|
||||||
|
6,Course Indicator,1,21-21,
|
||||||
|
7,Profit Centre Code/ Train Service Code,8,22-29,
|
||||||
|
8,Business Sector,1,30-30,
|
||||||
|
9,Power Type,3,31-33,
|
||||||
|
10,Timing Load,4,34-37,
|
||||||
|
11,Speed,3,38-40,
|
||||||
|
12,Operating Chars,6,41-46,
|
||||||
|
13,Train Class,1,47-47,
|
||||||
|
14,Sleepers,1,48-48,
|
||||||
|
15,Reservations,1,49-49,
|
||||||
|
16,Connect Indicator,1,50-50,
|
||||||
|
17,Catering Code,4,51-54,
|
||||||
|
18,Service Branding,4,55-58,
|
||||||
|
19,Traction Class,4,59-62,
|
||||||
|
20,UIC Code,5,63-67,"Only populated for trains travelling to/from Europe via the Channel Tunnel, otherwise blank."
|
||||||
|
21,Retail Service ID,8,68-75,
|
||||||
|
22,Spare,5,76-80,
|
||||||
|
@@ -0,0 +1,12 @@
|
|||||||
|
Field,Field Description,Length,Position,Notes
|
||||||
|
1,Record Identity,2,1-2,With the constant value ‘HD’.
|
||||||
|
2,File Identity,20,3-22,
|
||||||
|
3,Date of Extract,6,23-28,Format ddmmyy defining the date that the BTD extract file was created.
|
||||||
|
4,Time of Extract,4,29-32,hhmm defining the time that the BTD extract file was created.
|
||||||
|
5,Current File Reference,7,33-39,Unique file reference.
|
||||||
|
6,Last-file-reference,7,40-46,Unique file reference.
|
||||||
|
7,Update Indicator,1,47,‘U’=Update. ‘F’=Full extract.
|
||||||
|
8,Version,1,48,Version identifier of CIF software.
|
||||||
|
9,Extract start date,6,49-54,Same as Field 3 above.
|
||||||
|
10,Extract end date,6,55-60,
|
||||||
|
11,Spare,20,61-80,
|
||||||
|
@@ -0,0 +1,16 @@
|
|||||||
|
Field,Field Description,Length,Position,Notes
|
||||||
|
1,Record Identity,2,1-2,With the constant value ‘LI’.
|
||||||
|
2,Location,8,3-10,TIPLOC + Suffix. Suffix is always the eighth character.
|
||||||
|
3,Scheduled Arrival Time,5,11-15,
|
||||||
|
4,Scheduled Departure Time,5,16-20,
|
||||||
|
5,Scheduled Pass,5,21-25,
|
||||||
|
6,Public Arrival,4,26-29,If there is no Public Arrival time this field will default to 0000.
|
||||||
|
7,Public Departure,4,30-33,If there is no Public Departure time this field will default to 0000.
|
||||||
|
8,Platform,3,34-36,
|
||||||
|
9,Line,3,37-39,
|
||||||
|
10,Path,3,40-42,
|
||||||
|
11,Activity,12,43-54,Up to 6 activity codes may be present.
|
||||||
|
12,Engineering Allowance,2,55-56,
|
||||||
|
13,Pathing Allowance,2,57-58,
|
||||||
|
14,Performance Allowance,2,59-60,
|
||||||
|
15,Spare,20,61-80,
|
||||||
|
@@ -0,0 +1,12 @@
|
|||||||
|
Field,Field Description,Length,Position,Notes
|
||||||
|
1,Record Identity,2,1-2,With the constant value ‘LO’.
|
||||||
|
2,Location,8,3-10,TIPLOC + Suffix. Suffix is always the eighth character.
|
||||||
|
3,Scheduled Departure Time,5,11-15,
|
||||||
|
4,Public Departure Time,4,16-19,If there is no Public Departure time this field will default to 0000.
|
||||||
|
5,Platform,3,20-22,
|
||||||
|
6,Line,3,23-25,
|
||||||
|
7,Engineering Allowance,2,26-27,
|
||||||
|
8,Pathing Allowance,2,28-29,
|
||||||
|
9,Activity,12,30-41,"Up to 6 activity codes may be present. The first 2 characters will always be TB (train begins). If there are no other activity codes, this defaults to being an advertised departure."
|
||||||
|
10,Performance Allowance,2,42-43,
|
||||||
|
11,Spare,37,44-80,
|
||||||
|
@@ -0,0 +1,9 @@
|
|||||||
|
Field,Field Description,Length,Position,Notes
|
||||||
|
1,Record Identity,2,1-2,With the constant value ‘LT’.
|
||||||
|
2,Location,8,3-10,TIPLOC +Suffix. Suffix is always the eighth character.
|
||||||
|
3,Scheduled Arrival Time,5,11-15,
|
||||||
|
4,Public Arrival Time,4,16-19,If there is no Public Arrival time this field will default to 0000.
|
||||||
|
5,Platform,3,20-22,
|
||||||
|
6,Path,3,23-25,
|
||||||
|
7,Activity,12,26-37,"Up to 6 activity codes may be present. The first 2 characters will always be TF (train finishes). If there are no other activity codes, this defaults to being an advertised arrival."
|
||||||
|
8,Spare,43,38-80,
|
||||||
|
@@ -0,0 +1,13 @@
|
|||||||
|
Field,Field Description,Length,Position,Notes
|
||||||
|
1,Record Identity,2,1-2,With the constant value ‘TA’.
|
||||||
|
2,TIPLOC code,7,3-9,A TIPLOC is 4-7 characters. If less than 7 then it will be padded by blanks.
|
||||||
|
3,Capitals,2,10-11,Defines capitalisation of TIPLOC. Can be ignored for retailing/journey planners.
|
||||||
|
4,National Location Code,6,12-17,
|
||||||
|
5,NLC Check Character,1,18-18,
|
||||||
|
6,TPS Description,26,19-44,
|
||||||
|
7,STANOX,5,45-49,TOPS location code.
|
||||||
|
8,PO MCP Code,4,50-53,Post Office Location Code. (Not used but may contain historic data or three blank spaces followed by 0).
|
||||||
|
9,CRS Code,3,54-56,
|
||||||
|
10,Description,16,57-72,Description used in LENNON.
|
||||||
|
11,New TIPLOC,7,73-79,Only present if TIPLOC change.
|
||||||
|
12,Spare,1,80-80,
|
||||||
|
@@ -0,0 +1,4 @@
|
|||||||
|
Field,Field Description,Length,Position,Notes
|
||||||
|
1,Record Identity,2,1-2,With the constant value ‘TD’.
|
||||||
|
2,TIPLOC code,7,3-9,
|
||||||
|
3,Spare,71,10-80,
|
||||||
|
@@ -0,0 +1,12 @@
|
|||||||
|
Field,Field Description,Length,Position,Notes
|
||||||
|
1,Record Identity,2,1-2,With the constant value ‘TI’.
|
||||||
|
2,TIPLOC code,7,3-9,A TIPLOC is 4-7 characters. If less than 7 then it will be padded by blanks.
|
||||||
|
3,Capitals,2,10-11,Defines capitalisation of TIPLOC. Can be ignored for retailing/journey planners.
|
||||||
|
4,National Location Code,6,12-17,
|
||||||
|
5,NLC Check Character,1,18-18,
|
||||||
|
6,TPS Description,26,19-44,
|
||||||
|
7,STANOX,5,45-49,TOPS location code.
|
||||||
|
8,PO MCP Code,4,50-53,Post Office Location Code. (Not used but may contain historic data or three blank spaces followed by 0).
|
||||||
|
9,CRS Code,3,54-56,
|
||||||
|
10,Description,16,57-72,Description used in LENNON.
|
||||||
|
11,Spare,8,73-80,
|
||||||
|
@@ -0,0 +1,3 @@
|
|||||||
|
Field,Field Description,Length,Position,Notes
|
||||||
|
1,Record Identity,2,1-2,With the constant value ‘ZZ’.
|
||||||
|
2,Spare,78,3-80,
|
||||||
|
@@ -1,6 +1,12 @@
|
|||||||
from national_rail_timetable.nr_requests import fetch_nr_token, fetch_nr_timetable_files
|
from national_rail_timetable.nr_requests import fetch_nr_token, fetch_nr_timetable_files
|
||||||
from national_rail_timetable.parsing import extract_specification_document_tables
|
from national_rail_timetable.parsing import (
|
||||||
|
extract_specification_document_tables,
|
||||||
|
store_specification_table_raws,
|
||||||
|
read_specification_table_raws,
|
||||||
|
)
|
||||||
|
|
||||||
# print(fetch_nr_token())
|
# print(fetch_nr_token())
|
||||||
# print(fetch_nr_timetable_files())
|
# print(fetch_nr_timetable_files())
|
||||||
print(extract_specification_document_tables())
|
tables = extract_specification_document_tables()
|
||||||
|
print(store_specification_table_raws(tables))
|
||||||
|
print(read_specification_table_raws())
|
||||||
|
|||||||
@@ -30,13 +30,20 @@ SPECIFICATION_TABLE_LOCATIONS = {
|
|||||||
"MCA_TD": (23, 0),
|
"MCA_TD": (23, 0),
|
||||||
"MCA_ZZ": (23, 1),
|
"MCA_ZZ": (23, 1),
|
||||||
}
|
}
|
||||||
|
DEFAULT_RAW_SPEC_DATA_DIR = Path(__file__).parents[2] / "data/specification_tables"
|
||||||
|
|
||||||
|
|
||||||
# Functions
|
# Functions
|
||||||
# TODO: Implement better header check and row breaks to ingest .ALF and others.
|
# TODO: Implement better header check and row breaks to ingest .ALF and others.
|
||||||
|
# WARN: Row breaks may present an issue in that they may not be determinable generically.
|
||||||
def extract_specification_document_tables(
|
def extract_specification_document_tables(
|
||||||
path: Path | None = None, # pyright: ignore[reportRedeclaration]
|
path: Path | None = None, # pyright: ignore[reportRedeclaration]
|
||||||
) -> dict[str, pd.DataFrame]:
|
) -> dict[str, pd.DataFrame]:
|
||||||
|
"""
|
||||||
|
Given the path to RSP's Timetable Specification document,
|
||||||
|
read and extract (currently just .MCA's) descriptor tables.
|
||||||
|
Returns a dictionary of pandas DataFrames by <file>_<record>.
|
||||||
|
"""
|
||||||
path: Path = (
|
path: Path = (
|
||||||
path
|
path
|
||||||
if path is not None
|
if path is not None
|
||||||
@@ -114,3 +121,26 @@ def extract_specification_document_tables(
|
|||||||
)
|
)
|
||||||
|
|
||||||
return tables
|
return tables
|
||||||
|
|
||||||
|
|
||||||
|
def store_specification_table_raws(
|
||||||
|
tables: dict[str, pd.DataFrame],
|
||||||
|
data_dir: Path | None = None,
|
||||||
|
):
|
||||||
|
data_dir = data_dir if data_dir is not None else DEFAULT_RAW_SPEC_DATA_DIR
|
||||||
|
data_dir.mkdir(exist_ok=True, parents=True)
|
||||||
|
for name, df in tables.items():
|
||||||
|
df.to_csv(data_dir / f"{name}.csv", index=False)
|
||||||
|
return data_dir
|
||||||
|
|
||||||
|
|
||||||
|
def read_specification_table_raws(
|
||||||
|
data_dir: Path | None = None,
|
||||||
|
) -> dict[str, pd.DataFrame]:
|
||||||
|
data_dir = data_dir if data_dir is not None else DEFAULT_RAW_SPEC_DATA_DIR
|
||||||
|
tables = {}
|
||||||
|
for path in data_dir.iterdir():
|
||||||
|
if path.is_dir() or path.name[-4:] != ".csv":
|
||||||
|
continue
|
||||||
|
tables[path.name[:-4]] = pd.read_csv(path)
|
||||||
|
return tables
|
||||||
|
|||||||
Reference in New Issue
Block a user