use of ai
This commit is contained in:
112
Use of AI.txt
Normal file
112
Use of AI.txt
Normal file
@@ -0,0 +1,112 @@
|
||||
|
||||
[1]. Tool: Google AI Mode
|
||||
Prompt: can I call .split() or an equivalent on a series of strings
|
||||
Output:
|
||||
|
||||
import pandas as pd
|
||||
|
||||
data = {'address': ['123 Main St, Anytown, USA', '456 Oak Ave, Somewhere, CA', '789 Pine Rd, Nowhere, NY']}
|
||||
df = pd.DataFrame(data)
|
||||
|
||||
# Split the 'address' column and expand the result into three new columns
|
||||
df[['street', 'city', 'state']] = df['address'].str.split(', ', expand=True)
|
||||
|
||||
print(df)
|
||||
|
||||
[2]. Tool: Google Gemini
|
||||
Prompt: How do I efficiently convert a Series of strings to one of integers based on first letter in each string
|
||||
Output:
|
||||
import pandas as pd
|
||||
|
||||
s = pd.Series(['Cherry', 'Banana', 'Apple', 'Blueberry', 'Avocado'])
|
||||
|
||||
# Convert the first letters to a category type and get the codes
|
||||
codes = s.str[0].astype('category').cat.codes
|
||||
|
||||
print(codes)
|
||||
|
||||
[3]. Tool: Google Gemini
|
||||
Prompt (In conversation with [2]): I want to specify my own codes
|
||||
Output:
|
||||
custom_map = {
|
||||
'A': 100,
|
||||
'B': 250,
|
||||
'C': 310
|
||||
}
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np # Used for np.nan
|
||||
|
||||
s = pd.Series(['Apple', 'Banana', 'Cherry', 'Avocado', 'Durian'])
|
||||
|
||||
# Apply the custom map to the first letter of each string
|
||||
s_encoded = s.str[0].map(custom_map)
|
||||
|
||||
print(s_encoded)
|
||||
|
||||
[4]. Tool: Google Gemini
|
||||
Prompt: dtstring = data['DateTime'].str.extract(r'(\d+)')
|
||||
data['year'] = dtstring[0].astype(int)
|
||||
data['month'] = dtstring[1].astype(int)
|
||||
data['date'] = dtstring[2].astype(int)
|
||||
data['minute'] = dtstring[3].astype(int) * 60 + dtstring[4].astype(int)
|
||||
|
||||
what's wrong with this?
|
||||
|
||||
Output:
|
||||
# Regex with multiple capture groups for year, month, day, hour, and minute
|
||||
pattern = r'(\d{4}).*?(\d{2}).*?(\d{2}).*?(\d{2}).*?(\d{2})'
|
||||
dtstring = data['DateTime'].str.extract(pattern)
|
||||
|
||||
# Now dtstring has 5 columns (0 to 4)
|
||||
data['year'] = dtstring[0].astype(int)
|
||||
data['month'] = dtstring[1].astype(int)
|
||||
data['date'] = dtstring[2].astype(int)
|
||||
# Note: The calculation seems to be for the minute of the day
|
||||
data['minute_of_day'] = dtstring[3].astype(int) * 60 + dtstring[4].astype(int)
|
||||
|
||||
[5]. Tool: Google Gemini
|
||||
Prompt: can I just collect all groups of digits?
|
||||
Output:
|
||||
import pandas as pd
|
||||
|
||||
# Sample data
|
||||
data = pd.DataFrame({'DateTime': ['2025-10-06 16:45:30', 'Date:2024/09/01 Time:08-30-00']})
|
||||
|
||||
# 1. Find all groups of digits and get a Series of lists
|
||||
digit_lists = data['DateTime'].str.findall(r'(\d+)')
|
||||
# This returns:
|
||||
# 0 [2025, 10, 06, 16, 45, 30]
|
||||
# 1 [2024, 09, 01, 08, 30, 00]
|
||||
# Name: DateTime, dtype: object
|
||||
|
||||
# 2. Expand the lists into a DataFrame
|
||||
dt_parts = pd.DataFrame(digit_lists.tolist(), index=data.index)
|
||||
|
||||
# 3. Assign columns, converting to integer type
|
||||
data['year'] = dt_parts[0].astype(int)
|
||||
data['month'] = dt_parts[1].astype(int)
|
||||
data['date'] = dt_parts[2].astype(int)
|
||||
data['hour'] = dt_parts[3].astype(int)
|
||||
data['minute'] = dt_parts[4].astype(int)
|
||||
|
||||
[6]. Tool: Google Gemini
|
||||
Prompt: x = data[['AgeInDays':'HasName'], ['Type_Cat':'Sex_Unknown']] can I do this?
|
||||
Output:
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
# Sample DataFrame with non-contiguous column names
|
||||
data = pd.DataFrame(np.random.rand(5, 10), columns=[
|
||||
'AgeInDays', 'AgeInMonths', 'HasName',
|
||||
'Type_Cat', 'Type_Dog', 'Type_Bird',
|
||||
'Sex_Male', 'Sex_Female', 'Sex_Unknown',
|
||||
'Other_Col'
|
||||
])
|
||||
|
||||
# Create a combined list of columns by extracting columns from the .loc slices
|
||||
cols_to_select = data.loc[:, 'AgeInDays':'HasName'].columns.tolist() + \
|
||||
data.loc[:, 'Type_Cat':'Sex_Unknown'].columns.tolist()
|
||||
|
||||
# Select the combined columns using the single list
|
||||
x = data[cols_to_select].copy()
|
||||
Reference in New Issue
Block a user