diff --git a/Use of AI.txt b/Use of AI.txt new file mode 100644 index 0000000..6ab2b02 --- /dev/null +++ b/Use of AI.txt @@ -0,0 +1,112 @@ + +[1]. Tool: Google AI Mode + Prompt: can I call .split() or an equivalent on a series of strings + Output: + + import pandas as pd + + data = {'address': ['123 Main St, Anytown, USA', '456 Oak Ave, Somewhere, CA', '789 Pine Rd, Nowhere, NY']} + df = pd.DataFrame(data) + + # Split the 'address' column and expand the result into three new columns + df[['street', 'city', 'state']] = df['address'].str.split(', ', expand=True) + + print(df) + +[2]. Tool: Google Gemini + Prompt: How do I efficiently convert a Series of strings to one of integers based on first letter in each string + Output: + import pandas as pd + + s = pd.Series(['Cherry', 'Banana', 'Apple', 'Blueberry', 'Avocado']) + + # Convert the first letters to a category type and get the codes + codes = s.str[0].astype('category').cat.codes + + print(codes) + +[3]. Tool: Google Gemini + Prompt (In conversation with [2]): I want to specify my own codes + Output: + custom_map = { + 'A': 100, + 'B': 250, + 'C': 310 + } + + import pandas as pd + import numpy as np # Used for np.nan + + s = pd.Series(['Apple', 'Banana', 'Cherry', 'Avocado', 'Durian']) + + # Apply the custom map to the first letter of each string + s_encoded = s.str[0].map(custom_map) + + print(s_encoded) + +[4]. Tool: Google Gemini + Prompt: dtstring = data['DateTime'].str.extract(r'(\d+)') + data['year'] = dtstring[0].astype(int) + data['month'] = dtstring[1].astype(int) + data['date'] = dtstring[2].astype(int) + data['minute'] = dtstring[3].astype(int) * 60 + dtstring[4].astype(int) + + what's wrong with this? + + Output: + # Regex with multiple capture groups for year, month, day, hour, and minute + pattern = r'(\d{4}).*?(\d{2}).*?(\d{2}).*?(\d{2}).*?(\d{2})' + dtstring = data['DateTime'].str.extract(pattern) + + # Now dtstring has 5 columns (0 to 4) + data['year'] = dtstring[0].astype(int) + data['month'] = dtstring[1].astype(int) + data['date'] = dtstring[2].astype(int) + # Note: The calculation seems to be for the minute of the day + data['minute_of_day'] = dtstring[3].astype(int) * 60 + dtstring[4].astype(int) + +[5]. Tool: Google Gemini + Prompt: can I just collect all groups of digits? + Output: + import pandas as pd + + # Sample data + data = pd.DataFrame({'DateTime': ['2025-10-06 16:45:30', 'Date:2024/09/01 Time:08-30-00']}) + + # 1. Find all groups of digits and get a Series of lists + digit_lists = data['DateTime'].str.findall(r'(\d+)') + # This returns: + # 0 [2025, 10, 06, 16, 45, 30] + # 1 [2024, 09, 01, 08, 30, 00] + # Name: DateTime, dtype: object + + # 2. Expand the lists into a DataFrame + dt_parts = pd.DataFrame(digit_lists.tolist(), index=data.index) + + # 3. Assign columns, converting to integer type + data['year'] = dt_parts[0].astype(int) + data['month'] = dt_parts[1].astype(int) + data['date'] = dt_parts[2].astype(int) + data['hour'] = dt_parts[3].astype(int) + data['minute'] = dt_parts[4].astype(int) + +[6]. Tool: Google Gemini + Prompt: x = data[['AgeInDays':'HasName'], ['Type_Cat':'Sex_Unknown']] can I do this? + Output: + import pandas as pd + import numpy as np + + # Sample DataFrame with non-contiguous column names + data = pd.DataFrame(np.random.rand(5, 10), columns=[ + 'AgeInDays', 'AgeInMonths', 'HasName', + 'Type_Cat', 'Type_Dog', 'Type_Bird', + 'Sex_Male', 'Sex_Female', 'Sex_Unknown', + 'Other_Col' + ]) + + # Create a combined list of columns by extracting columns from the .loc slices + cols_to_select = data.loc[:, 'AgeInDays':'HasName'].columns.tolist() + \ + data.loc[:, 'Type_Cat':'Sex_Unknown'].columns.tolist() + + # Select the combined columns using the single list + x = data[cols_to_select].copy() \ No newline at end of file