#IFSRCNDEF taxi_unwrapped 
#PARSER PQL
#METADATA TimeInterval
#METADATA Latency
/// 500m south -> -0.004491556 latitude
/// 250m south -> -0.002245778 latitude
/// 500m east -> +0.005986 longitude
/// 250m east -> +0.002993 longitude
#DEFINE dLatitude 0.002245778
#DEFINE dLongitude 0.002993
/// first cell center: 41.474937 -74.913585
/// north border: 41.479428556
/// south border: 40.131961756
/// west border: -74.919571
/// east border: -73.123771
#DEFINE border_north 41.479428556
#DEFINE border_south 40.131961756
#DEFINE border_west -74.919571
#DEFINE border_east -73.123771
#DEFINE InputFile ${WORKSPACEPROJECT}/sorted_data.csv
#IFDEF FULL_DATA
#DEFINE InputFile C:/Data/GC/2015/sorted_data.csv
#ENDIF
#IFDEF RUNTEST
#IF RUNTEST == "1"
#DEFINE InputFile ${WORKSPACEPROJECT}/test cases/test01/test_01.csv
#ENDIF
#IF RUNTEST == "2"
#DEFINE InputFile ${WORKSPACEPROJECT}/test cases/test02/test_02.csv
#ENDIF
#IF RUNTEST == "3"
#DEFINE InputFile ${WORKSPACEPROJECT}/test cases/test03/test_03.csv
#ENDIF
#ENDIF
#RUNQUERY
taxi_unwrapped := CSVFILESOURCE({
            schema = [
              ['medallion','String'],              
              ['hack_license','String'], 
              ['pickup_datetime','String'],
              ['dropoff_datetime','String'],
              ['trip_time_in_secs','Integer'],
              ['trip_distance','Double'], 
              ['pickup_longitude','Double'],
              ['pickup_latitude','Double'], 
              ['dropoff_longitude','Double'],
              ['dropoff_latitude','Double'], 
              ['payment_type','String'], 
              ['fare_amount','Double'], 
              ['surcharge','Double'],
              ['mta_tax','Double'], 
              ['tip_amount','Double'], 
              ['tolls_amount','Double'], 
              ['total_amount','Double'] 
            ],
            filename = '${INPUTFILE}',
            source = 'taxi'
         }        
    )
#RUNQUERY
taxi := TIMESTAMP({START = 'dropoff_datetime_long'}, 
        Map({expressions = [
                'medallion',
                ['toDate(pickup_datetime,"yyyy-MM-dd HH:mm:ss")','pickup_datetime'],
                ['toDate(dropoff_datetime,"yyyy-MM-dd HH:mm:ss")','dropoff_datetime'],
                ['toLong(toDate(dropoff_datetime,"yyyy-MM-dd HH:mm:ss"))','dropoff_datetime_long'],
                ['toInteger(ceil((ceil((pickup_longitude - ${border_west})*10000) 
                                    / ceil(${dLongitude}*10000))-1))', 'pickup_cell_x'],
                ['toInteger(ceil((ceil((${border_north} - pickup_latitude)*10000) 
                                    / ceil(${dLatitude}*10000))-1))', 'pickup_cell_y'],
                ['toInteger(ceil((ceil((dropoff_longitude - ${border_west})*10000) 
                                    / ceil(${dLongitude}*10000))-1))', 'dropoff_cell_x'],
                ['toInteger(ceil((ceil((${border_north} - dropoff_latitude)*10000) 
                                    / ceil(${dLatitude}*10000))-1))', 'dropoff_cell_y'],
                'fare_amount',
                'tip_amount'
            ]},
        SELECT({
                predicate = 'pickup_latitude <= ${border_north} && pickup_latitude >= ${border_south}
                && pickup_longitude <= ${border_east} && pickup_longitude >= ${border_west}
                && dropoff_latitude <= ${border_north} && dropoff_latitude >= ${border_south}
                && dropoff_longitude <= ${border_east} && dropoff_longitude >= ${border_west}'},
        taxi_unwrapped)
     ))
     
#ENDIF
  • No labels