If the source contains different sized (in this case tab separeted entries) use line protocol handler to read the input and store each element in one tuple.

source := RETRIEVE({
              source='source',
              transport='File',
              protocol='Line',
              datahandler='Tuple',
              options=[
                ['filename', '${WORKSPACEPROJECT}/source.txt']
              ],
              schema = [['line','String']]                                                                                        
            }                    
          )      

After that splitt the input regarding some condition with the split operator, in this example if the line starts with I or starts with S:

#PARSER PQL
#ADDQUERY        
splitted = ROUTE({
                predicates = ['startsWith(line,"I")', 'startsWith(line,"S")']              
              },
              source
            )
i_pre_out = MAP({
                expressions = [['Split(line,"\t")','line']]              
              },
              0:splitted
            )
            
i_out = MAP({
            expressions = [
              ['line[0]','val1'],
              ['line[1]','val2'],
              ['line[2]','val3'],
              ['line[3]','val4'],
              ['line[4]','val5'],
              ['line[5]','val6'],
              ['line[6]','val7']
            ]          
          },
          i_pre_out
        )
      
s_pre_out = MAP({
                expressions = [['Split(line,"\t")','line']]              
              },
              1:splitted
            )
            
s_out = MAP({expressions = [
              ['line[0]','val1'],
              ['line[1]','val2'],
              ['line[2]','val3'],
              ['line[3]','val4'],
              ['toFloat(line[4])','val5'],
              ['line[5]','val6']
            ]          
          },
          s_pre_out
        )  

  • No labels