from networkx import max_weight_matching, nx
import pandas as pd
from common.rpc.auth import write_spreadsheet
EMAIL_COL = "Email Address"
TIMEZONE_COL = "What is 8am PT (Berkeley Time) in your time?"
SKILL_COL = "How skillful a programmer do you consider yourself to be at this time?"
PNP_COL = "Are you taking CS 61A for a letter grade?"
WORDS_COL = "What are three words that describe your interests?"
[docs]def get_words(row):
"""Returns a list of stripped, lower case words from the ``WORDS_COL``
column of a student response row.
:param row: One-dimensional array containing a student's responses to the
partner matching questions.
:type row: ~pandas.Series
:return: a list of stripped, lower case words from the ``WORDS_COL`` column
of a student response row.
"""
return [
word.strip().lower()
for word in sorted((row[WORDS_COL] or "").split(",")) + [""] * 3
]
[docs]def get_weight(row1, row2):
"""Calculates and returns the partner matching weight between two students
based on their responses. The higher the weight, the more the partner
matching algorithm will favor matching these two students together.
:param row1: One-dimensional array containing the first student's responses
to the partner matching questions.
:type row1: ~pandas.Series
:param row2: One-dimensional array containing the second student's responses
to the partner matching questions.
:type row2: ~pandas.Series
:return: an int representing the partner matching weight between the student
whose responses are in row1 and the students whose responses are in
row2.
"""
score = 0
if row1[TIMEZONE_COL] == row2[TIMEZONE_COL]:
score += 20
if row1[SKILL_COL] == row2[SKILL_COL]:
score += 10
if row1[PNP_COL] == row2[PNP_COL]:
score += 5
words1 = get_words(row1)
words2 = get_words(row2)
score += sum(1 for word in words1 if word and word in words2)
return score
[docs]def match():
"""Performs the partner matching algorithm. The algorithm calculates the
weight between each pair of students using ``get_weight()``, forms a graph
where the nodes are the students and the edge weights are the weights
between each pair of students, and then calculates the maximum-weighted
matching of the graph, writing the results out to a Google sheet.
"""
data = pd.read_csv("data.csv", dtype=str).fillna("")
g = nx.Graph()
for i, _ in data.iterrows():
g.add_node(i)
for i, row1 in data.iterrows():
for j, row2 in data.iterrows():
if i >= j:
continue
g.add_edge(i, j, weight=get_weight(row1, row2))
matching = max_weight_matching(g)
csv = [["Student 1", "Student 2", "Timezone 1", "Timezone 2", "Words 1", "Words 2"]]
for i, j in matching:
csv.append(
[
data.ix[i][EMAIL_COL],
data.ix[j][EMAIL_COL],
data.ix[i][TIMEZONE_COL],
data.ix[j][TIMEZONE_COL],
data.ix[i][WORDS_COL],
data.ix[j][WORDS_COL],
]
)
write_spreadsheet(
url="https://docs.google.com/spreadsheets/d/1vpx-28ox2CNsyzbwrHLB9nCfiOZXksXp-jCgj21fNuw/",
sheet_name="Sheet1",
content=csv,
)
if __name__ == "__main__":
match()