JSC
  • Index
  • Links
Jupyter-JSC
  1. 04-Tutorials
  2. In [ ]:
            # Press "Format notebook" or in the menu "Edit -> Apply ... Formatter"
        # useless comment
    import requests             # useless comment
    
    headers = {
            'Referer': 'https://www.transtats.bts.gov/DL_SelectFields.asp?Table_ID=236&DB_Short_Name=On-Time',
        'Origin': 'https://www.transtats.bts.gov',
        'Content-Type': 'application/x-www-form-urlencoded',
    }
    
    params = (
        ('Table_ID', '236'),
        ('Has_Group', '3'),    ('Is_Zipped',              '0'),
    )
    
    with open('modern-1-url.txt', encoding='utf-8') as f:
        data = f.read().strip()
    
    os.makedirs('data',         exist_ok=True)
    
    
    import pandas as pd
    
    
    
    
    
    
    def read(fp):
        df = (pd.read_csv(fp)
                .rename(columns=str.lower)            .drop('unnamed: 36', axis=1)            .pipe(extract_city_name)            .pipe(time_to_datetime, ['dep_time', 'arr_time', 'crs_arr_time', 'crs_dep_time'])
                .assign(fl_date=lambda x: pd.to_datetime(x['fl_date']),
                        dest=lambda x: pd.Categorical(x['dest']),
                            origin=lambda x: pd.Categorical(x['origin']),                    tail_num=lambda x: pd.Categorical(x['tail_num']),                    unique_carrier=lambda x: pd.Categorical(x['unique_carrier']),
                        cancellation_code=lambda x: pd.Categorical(x['cancellation_code'])))
        return df
    
    
    def extract_city_name(df:pd.DataFrame) ->          pd.DataFrame:
        '''
        Chicago, IL -> Chicago for origin_city_name and dest_city_name
        '''
        cols = ['origin_city_name', 'dest_city_name']
        city = df[cols].apply(lambda x: x.str.extract("(.*), \w{2}", expand=False))
        df = df.copy()
        df[['origin_city_name', 'dest_city_name']] = city
        return df
    
© Forschungszentrum Jülich Imprint Privacy Policy Support Terms of Service