Source code for partnermatcher.main

from networkx import max_weight_matching, nx
import pandas as pd

from common.rpc.auth import write_spreadsheet

EMAIL_COL = "Email Address"
TIMEZONE_COL = "What is 8am PT (Berkeley Time) in your time?"
SKILL_COL = "How skillful a programmer do you consider yourself to be at this time?"
PNP_COL = "Are you taking CS 61A for a letter grade?"
WORDS_COL = "What are three words that describe your interests?"


[docs]def get_words(row):
    """Returns a list of stripped, lower case words from the ``WORDS_COL``
    column of a student response row.

    :param row: One-dimensional array containing a student's responses to the
        partner matching questions.
    :type row: ~pandas.Series

    :return: a list of stripped, lower case words from the ``WORDS_COL`` column
        of a student response row.
    """
    return [
        word.strip().lower()
        for word in sorted((row[WORDS_COL] or "").split(",")) + [""] * 3
    ]


[docs]def get_weight(row1, row2):
    """Calculates and returns the partner matching weight between two students
    based on their responses. The higher the weight, the more the partner
    matching algorithm will favor matching these two students together.

    :param row1: One-dimensional array containing the first student's responses
        to the partner matching questions.
    :type row1: ~pandas.Series
    :param row2: One-dimensional array containing the second student's responses
        to the partner matching questions.
    :type row2: ~pandas.Series

    :return: an int representing the partner matching weight between the student
        whose responses are in row1 and the students whose responses are in
        row2.
    """
    score = 0
    if row1[TIMEZONE_COL] == row2[TIMEZONE_COL]:
        score += 20
    if row1[SKILL_COL] == row2[SKILL_COL]:
        score += 10
    if row1[PNP_COL] == row2[PNP_COL]:
        score += 5
    words1 = get_words(row1)
    words2 = get_words(row2)
    score += sum(1 for word in words1 if word and word in words2)

    return score


[docs]def match():
    """Performs the partner matching algorithm. The algorithm calculates the
    weight between each pair of students using ``get_weight()``, forms a graph
    where the nodes are the students and the edge weights are the weights
    between each pair of students, and then calculates the maximum-weighted
    matching of the graph, writing the results out to a Google sheet.
    """
    data = pd.read_csv("data.csv", dtype=str).fillna("")
    g = nx.Graph()

    for i, _ in data.iterrows():
        g.add_node(i)

    for i, row1 in data.iterrows():
        for j, row2 in data.iterrows():
            if i >= j:
                continue
            g.add_edge(i, j, weight=get_weight(row1, row2))

    matching = max_weight_matching(g)

    csv = [["Student 1", "Student 2", "Timezone 1", "Timezone 2", "Words 1", "Words 2"]]

    for i, j in matching:
        csv.append(
            [
                data.ix[i][EMAIL_COL],
                data.ix[j][EMAIL_COL],
                data.ix[i][TIMEZONE_COL],
                data.ix[j][TIMEZONE_COL],
                data.ix[i][WORDS_COL],
                data.ix[j][WORDS_COL],
            ]
        )

    write_spreadsheet(
        url="https://docs.google.com/spreadsheets/d/1vpx-28ox2CNsyzbwrHLB9nCfiOZXksXp-jCgj21fNuw/",
        sheet_name="Sheet1",
        content=csv,
    )


if __name__ == "__main__":
    match()