use of ai

2025-10-09 11:21:11 -05:00
parent 76981fb68c
commit c7c23cbd45
1 changed files with 112 additions and 0 deletions
--- a/AI.txt
+++ b/AI.txt
@@ -0,0 +1,112 @@
+
+[1]. Tool: Google AI Mode
+      Prompt: can I call .split() or an equivalent on a series of strings
+      Output:
+      
+      import pandas as pd
+
+      data = {'address': ['123 Main St, Anytown, USA', '456 Oak Ave, Somewhere, CA', '789 Pine Rd, Nowhere, NY']}
+      df = pd.DataFrame(data)
+
+      # Split the 'address' column and expand the result into three new columns
+      df[['street', 'city', 'state']] = df['address'].str.split(', ', expand=True)
+
+      print(df)
+
+[2]. Tool: Google Gemini
+      Prompt: How do I efficiently convert a Series of strings to one of integers based on first letter in each string
+      Output:
+        import pandas as pd
+
+        s = pd.Series(['Cherry', 'Banana', 'Apple', 'Blueberry', 'Avocado'])
+
+        # Convert the first letters to a category type and get the codes
+        codes = s.str[0].astype('category').cat.codes
+
+        print(codes)
+
+[3]. Tool: Google Gemini
+      Prompt (In conversation with [2]): I want to specify my own codes
+      Output:
+        custom_map = {
+            'A': 100,
+            'B': 250,
+            'C': 310
+        }
+        
+        import pandas as pd
+        import numpy as np # Used for np.nan
+
+        s = pd.Series(['Apple', 'Banana', 'Cherry', 'Avocado', 'Durian'])
+
+        # Apply the custom map to the first letter of each string
+        s_encoded = s.str[0].map(custom_map)
+
+        print(s_encoded)
+
+[4]. Tool: Google Gemini
+      Prompt: dtstring = data['DateTime'].str.extract(r'(\d+)')
+              data['year'] = dtstring[0].astype(int)
+              data['month'] = dtstring[1].astype(int)
+              data['date'] = dtstring[2].astype(int)
+              data['minute'] = dtstring[3].astype(int) * 60 + dtstring[4].astype(int)
+
+              what's wrong with this?
+      
+      Output:
+        # Regex with multiple capture groups for year, month, day, hour, and minute
+        pattern = r'(\d{4}).*?(\d{2}).*?(\d{2}).*?(\d{2}).*?(\d{2})'
+        dtstring = data['DateTime'].str.extract(pattern)
+
+        # Now dtstring has 5 columns (0 to 4)
+        data['year'] = dtstring[0].astype(int)
+        data['month'] = dtstring[1].astype(int)
+        data['date'] = dtstring[2].astype(int)
+        # Note: The calculation seems to be for the minute of the day
+        data['minute_of_day'] = dtstring[3].astype(int) * 60 + dtstring[4].astype(int)
+
+[5]. Tool: Google Gemini
+      Prompt: can I just collect all groups of digits?
+      Output:
+        import pandas as pd
+
+        # Sample data
+        data = pd.DataFrame({'DateTime': ['2025-10-06 16:45:30', 'Date:2024/09/01 Time:08-30-00']})
+
+        # 1. Find all groups of digits and get a Series of lists
+        digit_lists = data['DateTime'].str.findall(r'(\d+)')
+        # This returns:
+        # 0    [2025, 10, 06, 16, 45, 30]
+        # 1    [2024, 09, 01, 08, 30, 00]
+        # Name: DateTime, dtype: object
+
+        # 2. Expand the lists into a DataFrame
+        dt_parts = pd.DataFrame(digit_lists.tolist(), index=data.index)
+
+        # 3. Assign columns, converting to integer type
+        data['year'] = dt_parts[0].astype(int)
+        data['month'] = dt_parts[1].astype(int)
+        data['date'] = dt_parts[2].astype(int)
+        data['hour'] = dt_parts[3].astype(int)
+        data['minute'] = dt_parts[4].astype(int)
+
+[6]. Tool: Google Gemini
+      Prompt: x = data[['AgeInDays':'HasName'], ['Type_Cat':'Sex_Unknown']] can I do this?
+      Output: 
+        import pandas as pd
+        import numpy as np
+
+        # Sample DataFrame with non-contiguous column names
+        data = pd.DataFrame(np.random.rand(5, 10), columns=[
+            'AgeInDays', 'AgeInMonths', 'HasName',
+            'Type_Cat', 'Type_Dog', 'Type_Bird',
+            'Sex_Male', 'Sex_Female', 'Sex_Unknown',
+            'Other_Col'
+        ])
+
+        # Create a combined list of columns by extracting columns from the .loc slices
+        cols_to_select = data.loc[:, 'AgeInDays':'HasName'].columns.tolist() + \
+                        data.loc[:, 'Type_Cat':'Sex_Unknown'].columns.tolist()
+
+        # Select the combined columns using the single list
+        x = data[cols_to_select].copy()