use of ai

This commit is contained in:
Aditya Pulipaka
2025-10-09 11:21:11 -05:00
parent 76981fb68c
commit c7c23cbd45

112
Use of AI.txt Normal file
View File

@@ -0,0 +1,112 @@
[1]. Tool: Google AI Mode
Prompt: can I call .split() or an equivalent on a series of strings
Output:
import pandas as pd
data = {'address': ['123 Main St, Anytown, USA', '456 Oak Ave, Somewhere, CA', '789 Pine Rd, Nowhere, NY']}
df = pd.DataFrame(data)
# Split the 'address' column and expand the result into three new columns
df[['street', 'city', 'state']] = df['address'].str.split(', ', expand=True)
print(df)
[2]. Tool: Google Gemini
Prompt: How do I efficiently convert a Series of strings to one of integers based on first letter in each string
Output:
import pandas as pd
s = pd.Series(['Cherry', 'Banana', 'Apple', 'Blueberry', 'Avocado'])
# Convert the first letters to a category type and get the codes
codes = s.str[0].astype('category').cat.codes
print(codes)
[3]. Tool: Google Gemini
Prompt (In conversation with [2]): I want to specify my own codes
Output:
custom_map = {
'A': 100,
'B': 250,
'C': 310
}
import pandas as pd
import numpy as np # Used for np.nan
s = pd.Series(['Apple', 'Banana', 'Cherry', 'Avocado', 'Durian'])
# Apply the custom map to the first letter of each string
s_encoded = s.str[0].map(custom_map)
print(s_encoded)
[4]. Tool: Google Gemini
Prompt: dtstring = data['DateTime'].str.extract(r'(\d+)')
data['year'] = dtstring[0].astype(int)
data['month'] = dtstring[1].astype(int)
data['date'] = dtstring[2].astype(int)
data['minute'] = dtstring[3].astype(int) * 60 + dtstring[4].astype(int)
what's wrong with this?
Output:
# Regex with multiple capture groups for year, month, day, hour, and minute
pattern = r'(\d{4}).*?(\d{2}).*?(\d{2}).*?(\d{2}).*?(\d{2})'
dtstring = data['DateTime'].str.extract(pattern)
# Now dtstring has 5 columns (0 to 4)
data['year'] = dtstring[0].astype(int)
data['month'] = dtstring[1].astype(int)
data['date'] = dtstring[2].astype(int)
# Note: The calculation seems to be for the minute of the day
data['minute_of_day'] = dtstring[3].astype(int) * 60 + dtstring[4].astype(int)
[5]. Tool: Google Gemini
Prompt: can I just collect all groups of digits?
Output:
import pandas as pd
# Sample data
data = pd.DataFrame({'DateTime': ['2025-10-06 16:45:30', 'Date:2024/09/01 Time:08-30-00']})
# 1. Find all groups of digits and get a Series of lists
digit_lists = data['DateTime'].str.findall(r'(\d+)')
# This returns:
# 0 [2025, 10, 06, 16, 45, 30]
# 1 [2024, 09, 01, 08, 30, 00]
# Name: DateTime, dtype: object
# 2. Expand the lists into a DataFrame
dt_parts = pd.DataFrame(digit_lists.tolist(), index=data.index)
# 3. Assign columns, converting to integer type
data['year'] = dt_parts[0].astype(int)
data['month'] = dt_parts[1].astype(int)
data['date'] = dt_parts[2].astype(int)
data['hour'] = dt_parts[3].astype(int)
data['minute'] = dt_parts[4].astype(int)
[6]. Tool: Google Gemini
Prompt: x = data[['AgeInDays':'HasName'], ['Type_Cat':'Sex_Unknown']] can I do this?
Output:
import pandas as pd
import numpy as np
# Sample DataFrame with non-contiguous column names
data = pd.DataFrame(np.random.rand(5, 10), columns=[
'AgeInDays', 'AgeInMonths', 'HasName',
'Type_Cat', 'Type_Dog', 'Type_Bird',
'Sex_Male', 'Sex_Female', 'Sex_Unknown',
'Other_Col'
])
# Create a combined list of columns by extracting columns from the .loc slices
cols_to_select = data.loc[:, 'AgeInDays':'HasName'].columns.tolist() + \
data.loc[:, 'Type_Cat':'Sex_Unknown'].columns.tolist()
# Select the combined columns using the single list
x = data[cols_to_select].copy()