Page History

There are two operations to realize a Recommender System with Odysseus:

Example how to use the operators

In this example, the MovieLens dataset is used. The column separator was changed to tab.

The file u_ordered.data is ordered by timestamp (this is not necessary but allows implementations to that take advantage of temporal effects, e. g., concept drift).The file unique_temporal_ordered_users.data has only the user column of The file rfr.csv has a sample of the the users in u_ordered.data. Duplicates Both files are removedattached.

Code Block

linenumbers	true

#PARSER CQL#DEFINE rating_data_input_file ${PROJECTPATH}/ml-100k/u_ordered.data
#DEFINE rfr_data_input_file ${PROJECTPATH}/ml-100k/rfr.csv


#PARSER PQL

#RUNQUERY
CREATE// STREAMA ml100kdata (useridstream Integer, itemid Integer, rating Double, timestamp Long)
   WRAPPER 'GenericPull'
   PROTOCOL 'CSV'
   TRANSPORT 'File'
   DATAHANDLER 'Tuple'
   OPTIONS (
      'filename' '${PROJECTPATH}/datasets/ml-100k/u_ordered.data',
      'delimiter' '\t'
      ,'scheduler.delay' '100'
   )

#RUNQUERY
CREATE STREAM ml100k_users (userid Integer)
   WRAPPER 'GenericPull'
   PROTOCOL 'CSV'
   TRANSPORT 'File'
   DATAHANDLER 'Tuple'
   OPTIONS (
      'filename' '${PROJECTPATH}/datasets/ml-100k/unique_temporal_ordered_users.data',
      'delimiter' '\t'
      ,'scheduler.delay' '1000'
   )

#PARSER PQL

#ADDQUERY
recommendationModels = RECOMMENDATION_LEARN(
   {
      item = 'itemid',
      user = 'userid',
      rating = 'rating',
      learner = 'Mahout',
      options = [
         'OptionRecommender'='SVDRecommender',
         'OptionFactorizer'='SVDPlusPlusFactorizer'
      ]
   },
   ml100k)

#ADDQUERY
recommendations = RECOMMENDATION(
   {
      recommender = 'recommender',
      user = 'userid',
      no_of_recommendations = 5
   },
   ml100k_users,
   recommendationModels)of ratings.
rating_data := ACCESS({source='rating_data', wrapper='GenericPull', transport='File', protocol='CSV', datahandler='Tuple', 
	options=[
		['Delimiter', '\t'],
		['filename', '${rating_data_input_file}']],
		schema=[
			['user','Integer'], // some learners need Long instead of Integer
			['item','Integer'], // some learners need Long instead of Integer
			['rating','Double'],
			['timestamp','StartTimeStamp']
	]
})
 
#RUNQUERY
// A data stream of request for recommendations of users.
rfr := TIMEWINDOW({size = 1}, ACCESS({source='rfr', wrapper='GenericPull', transport='File', protocol='CSV', datahandler='Tuple', 
	options=[
		['Delimiter', '\t'], 
		['filename', '${rfr_data_input_file}']], 
		schema=[
			['user','Integer'], // some learners need Long instead of Integer
			['timestamp','StartTimeStamp']
		]
}))


#QNAME RecommenderSystem
#QUERY
/// split learning and test data
splitted_rating_data = EXTRACT_TEST_DATA({strategy = 'ITTT'}, rating_data)

/// continuous learning
windowed_learning_data = TIMEWINDOW({size = [30, 'days']}, 0:splitted_rating_data)
models = TRAIN_RECSYS_MODEL({learner = 'BRISMF.MOA'}, windowed_learning_data)

/// recommending
recomm_candidates = RECOMMENDATION_CANDIDATES(JOIN(rfr, 1:models))
predicted_candidates = PREDICT_RATING(JOIN(models, recomm_candidates))
recommendations = RECOMMEND({top_n = 8, min_rating = 3.5}, predicted_candidates)

/// evaluation
predicted_test_data = PREDICT_RATING(JOIN(models, 1:splitted_rating_data))
model_errors = TEST_PREDICTION({aggregation_window_size = [24, 'hours']}, predicted_test_data)

Space shortcuts

Page tree

Versions Compared

Old Version 6

New Version Current

Key

Example how to use the operators