Length of a Post/Answer Map Reducer Code

Correlation between the length of a post and the length of answers.
Output the length of the post and the average answer length for each post.

Mapper result Reducet result
111 35 111
15084 237 15084
2 145 2
3778 164 3848
3778 69 3778
66193 60 66193
66193 34 66199
66193 302 66196
66193 288 66195
7185 86 7185
10000001 140 323.940451745
10000002 625 465.578947368
10000005 0 35.0
10000006 836 99.6666666667
10000007 4224 580.428571429
66193 59 154.666666667

#!/usr/bin/python
import sys
import csv

def mapper():
    reader = csv.reader(sys.stdin,delimiter='\t')
    header = reader.next()
    for line in reader:
        #print line
        id= line[0]
        body= len(line[4])
        parent_id  = line[6]
        if parent_id=="\N":
            parent_id = id
        print parent_id,'\t',body,'\t',id
        
def reducer():
    oldParentID = None
    oldBodyLength = 0
    times = 0
    questionLength =0
    reader = csv.reader(sys.stdin,delimiter='\t')
    for line in reader:
        newParentID, newBodyLength, newID = line
        if len(line)!=3:
            continue
        #print newParentID, "\t", newBodyLength, "\t", newID, len(newParentID), len(newID),(newParentID==newID)
        if oldParentID and oldParentID !=  newParentID:
            ql = 0
            if times>0:
                ql = (1.0*oldBodyLength/times)    
            print oldParentID, "\t",questionLength, "\t", ql
            oldBodyLength = 0
            times =0
            questionLength= 0
       
        oldParentID = newParentID
        if (int(oldParentID)==int(newID)):
            questionLength = newBodyLength
            #print questionLength
        else:
            oldBodyLength += int(newBodyLength)
            times+=1  
    if oldParentID:
         ql = 0
         if times>0:
             ql = (1.0*oldBodyLength/times)    
         print oldParentID, "\t",questionLength, "\t", ql
                 
Advertisement

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

%d bloggers like this: