Task Answers (Python Coding Warm-Up)
Contents
Task Answers (Python Coding Warm-Up)#
(Last updated: Jan 26, 2026)
Tasks#
import pandas as pd
import numpy as np
def check_answer_df(df_result, df_answer, n=1):
"""
This function checks if two output dataframes are the same.
"""
try:
assert df_answer.equals(df_result)
print("Test case %d passed." % n)
except:
print("Test case %d failed." % n)
print("")
print("Your output is:")
print(df_result)
print("")
print("Expected output is:")
print(df_answer)
def answer_resample_df(df):
"""
This function is the answer for task 1.
"""
# Copy to avoid modifying the original dataframe.
df = df.copy(deep=True)
# Convert the timestamp to datetime.
df.index = pd.to_datetime(df.index, unit="s", utc=True)
# Resample the timestamps by hour and take the average value.
# Because we want data from the past, so label need to be "right".
df = df.resample("60Min", label="right").mean()
return df
def answer_merge_df(df1, df2):
"""
This function is the answer for task 2.
"""
# Copy to avoid modifying the original dataframe.
df1 = df1.copy(deep=True)
df2 = df2.copy(deep=True)
# Make sure that the index has the same name.
df2.index.name = df1.index.name
# Merge the two data frames based on the index name.
# We need to use outer merging since we want to preserve data from both data frames.
df = pd.merge_ordered(df1, df2, on=df1.index.name, how="outer", fill_method=None)
# Move the datetime column to index
df = df.set_index(df1.index.name)
return df
def answer_aggregate_df(df):
"""
This function is the answer for task 3.
"""
# Copy to avoid modifying the original dataframe.
df = df.copy(deep=True)
# Filter the data
df = df[(df["v1"]>0)&(df["group"]!="15227")]
# Aggregate data for each group
all_groups = []
for g, df_g in df.groupby("group"):
# Select only the variable v1.
df_g = df_g["v1"]
# Resample data using your code (or the answer) for task 1
df_g = answer_resample_df(df_g)
# Set the dataframe's name to the group value
df_g.name = g
# Save the group in an array
all_groups.append(df_g)
# Merge all groups using your code (or the answer) for task 2
df = all_groups.pop(0)
while len(all_groups) != 0:
df = answer_merge_df(df, all_groups.pop(0))
# Fill in the missing data with value -1
df = df.fillna(0)
return df
def answer_transform_df(df):
"""
This function is the answer for task 4.
"""
# Copy to avoid modifying the original dataframe.
df = df.copy(deep=True)
# Define the function to process wind speed
def process_wind_mph(x):
if pd.isna(x):
return None
else:
return x<5
# Add the transformed columns.
df["wind_deg_sine"] = np.sin(np.deg2rad(df["wind_deg"]))
df["wind_deg_cosine"] = np.cos(np.deg2rad(df["wind_deg"]))
df["is_calm_wind"] = df["wind_mph"].apply(process_wind_mph)
# Delete the original columns.
df = df.drop(["wind_deg"], axis=1)
df = df.drop(["wind_mph"], axis=1)
return df
def answer_transform_text_df(df):
"""
This function is the answer for task 5.
"""
# Copy to avoid modifying the original dataframe.
df = df.copy(deep=True)
# Process the required columns.
df["CV"] = df["venue"].str.contains("BMVC|WACV|ICCV|CVPR")
df["ML"] = df["venue"].str.contains("NeurIPS|ICLR")
df["MM"] = df["venue"].str.contains("MM")
df["year"] = df["venue"].str.extract(r'([0-9]{4})')
# Delete the venue columns
df = df.drop(["venue"], axis=1)
return df