Page tree
Skip to end of metadata
Go to start of metadata

If the source contains different sized (in this case tab separeted entries) use line protocol handler to read the input and store each element in one tuple.

source := RETRIEVE({
              source='source',
              transport='File',
              protocol='Line',
              datahandler='Tuple',
              options=[
                ['filename', '${WORKSPACEPROJECT}/source.txt']
              ],
              schema = [['line','String']]                                                                                        
            }                    
          )      

After that splitt the input regarding some condition with the split operator, in this example if the line starts with I or starts with S:

#PARSER PQL
#ADDQUERY        
splitted = ROUTE({
                predicates = ['startsWith(line,"I")', 'startsWith(line,"S")']              
              },
              source
            )
i_pre_out = MAP({
                expressions = [['Split(line,"\t")','line']]              
              },
              0:splitted
            )
            
i_out = MAP({
            expressions = [
              ['line[0]','val1'],
              ['line[1]','val2'],
              ['line[2]','val3'],
              ['line[3]','val4'],
              ['line[4]','val5'],
              ['line[5]','val6'],
              ['line[6]','val7']
            ]          
          },
          i_pre_out
        )
      
s_pre_out = MAP({
                expressions = [['Split(line,"\t")','line']]              
              },
              1:splitted
            )
            
s_out = MAP({expressions = [
              ['line[0]','val1'],
              ['line[1]','val2'],
              ['line[2]','val3'],
              ['line[3]','val4'],
              ['toFloat(line[4])','val5'],
              ['line[5]','val6']
            ]          
          },
          s_pre_out
        )  

  • No labels