from mrjob.job import MRJob from mrjob.job import MRStep class RowColumn(MRJob): def Mapper(self, key, value): """ Parse each line to identify the cells of the matrix """ if "#" not in value and len(value.strip()) > 0: values = value.split() yield values[1], (values[0],values[2]) def Reducer(self, key, value): """ Merge all the duplicated keys to extract the elements in same column """ yield key, value def steps(self): return [MRStep(mapper=self.Mapper, reducer=self.Reducer)] if __name__ == '__main__': RowColumn.run()