| 
									
										
										
										
											2025-01-03 00:31:20 +01:00
										 |  |  | #!.venv/bin/python | 
					
						
							|  |  |  | r"""This module extracts a part of a markdown string from an input file or a given input string.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | The extraction starts at a line that contains the content specified by the `--start-line` parameter | 
					
						
							|  |  |  | and ends at a line that contains the content specified by the `--end-line` parameter. | 
					
						
							|  |  |  | If `--start-line` is not specified, extraction starts from the beginning of the file or string. | 
					
						
							|  |  |  | If `--end-line` is not specified, extraction goes to the end of the file or string. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | The extracted markdown string is written either to stdout or to the specified output file. | 
					
						
							|  |  |  | Additionally, the heading levels can be adjusted by specifying the `--heading-level` parameter. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Usage: | 
					
						
							|  |  |  |     scripts/extract_markdown.py [--input-file INPUT_FILE | --input INPUT_STRING] [--start-line START_LINE] [--end-line END_LINE] [--output-file OUTPUT_FILE] [--heading-level HEADING_LEVEL] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Arguments: | 
					
						
							|  |  |  |     --input-file   : The file path to read the markdown content from. | 
					
						
							|  |  |  |     --input        : The markdown content as a string. | 
					
						
							|  |  |  |     --start-line   : Optional. The string content of the start line from where extraction begins. | 
					
						
							|  |  |  |     --end-line     : Optional. The string content of the end line where extraction ends. | 
					
						
							|  |  |  |     --output-file  : Optional. The file path to write the extracted markdown content to. | 
					
						
							|  |  |  |     --heading-level: Optional. The number of additional `#` to add to markdown headings or to remove | 
					
						
							|  |  |  |         from markdown headings if negative. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Example: | 
					
						
							|  |  |  |     scripts/extract_markdown.py --input-file input.md --start-line "# Start" --end-line "# End" --output-file output.md --heading-level 1 | 
					
						
							|  |  |  |     scripts/extract_markdown.py --input "# Start\n\nSome content here\n\n# End" --start-line "# Start" --end-line "# End" --output-file output.md --heading-level 1 | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | This module extracts a part of a markdown string from an input file or a given input string. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | The extraction starts at a line that contains the content specified by the `--start-line` parameter | 
					
						
							|  |  |  | and ends at a line that contains the content specified by the `--end-line` parameter. | 
					
						
							|  |  |  | If `--start-line` is not specified, extraction starts from the beginning of the file or string. | 
					
						
							|  |  |  | If `--end-line` is not specified, extraction goes to the end of the file or string. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | The extracted markdown string is written either to stdout or to the specified output file. | 
					
						
							|  |  |  | Additionally, the heading levels can be adjusted by specifying the `--heading-level` parameter. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Usage: | 
					
						
							|  |  |  |     python extract_markdown.py [--input-file INPUT_FILE | --input INPUT_STRING | --input-stdin] [--start-line START_LINE] [--end-line END_LINE] [--output-file OUTPUT_FILE] [--heading-level HEADING_LEVEL] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Arguments: | 
					
						
							|  |  |  |     --input-file   : The file path to read the markdown content from. | 
					
						
							|  |  |  |     --input        : The markdown content as a string. | 
					
						
							|  |  |  |     --input-stdin  : Read markdown content from stdin. | 
					
						
							|  |  |  |     --start-line   : Optional. The string content of the start line from where extraction begins. | 
					
						
							|  |  |  |     --end-line     : Optional. The string content of the end line where extraction ends. | 
					
						
							|  |  |  |     --output-file  : Optional. The file path to write the extracted markdown content to. | 
					
						
							|  |  |  |     --heading-level: Optional. The number of additional `#` to add to markdown headings or to remove from markdown headings if negative. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Example: | 
					
						
							|  |  |  |     python extract_markdown.py --input-file input.md --start-line "# Start" --end-line "# End" --output-file output.md --heading-level 1 | 
					
						
							|  |  |  |     python extract_markdown.py --input "# Start\n\nSome content here\n\n# End" --start-line "# Start" --end-line "# End" --output-file output.md --heading-level 1 | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import argparse | 
					
						
							|  |  |  | import re | 
					
						
							|  |  |  | import sys | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def adjust_heading_levels(line: str, heading_level: int) -> str: | 
					
						
							|  |  |  |     """Adjust the heading levels in a markdown line.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Args: | 
					
						
							|  |  |  |         line (str): The markdown line. | 
					
						
							|  |  |  |         heading_level (int): The number of levels to adjust the headings by. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Returns: | 
					
						
							|  |  |  |         adjusted_line (str): The line with adjusted heading levels. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     heading_pattern = re.compile(r"^(#+)\s") | 
					
						
							|  |  |  |     match = heading_pattern.match(line) | 
					
						
							|  |  |  |     if match: | 
					
						
							|  |  |  |         current_level = len(match.group(1)) | 
					
						
							|  |  |  |         new_level = current_level + heading_level | 
					
						
							|  |  |  |         if new_level > 0: | 
					
						
							|  |  |  |             adjusted_line = "#" * new_level + line[current_level:] | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             adjusted_line = line[current_level:] | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         adjusted_line = line | 
					
						
							|  |  |  |     return adjusted_line | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def extract_markdown(content: str, start_line: str, end_line: str, heading_level: int) -> str: | 
					
						
							|  |  |  |     """Extract a part of a markdown string from given content.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Args: | 
					
						
							|  |  |  |         content (str): The markdown content. | 
					
						
							|  |  |  |         start_line (str): The string content of the start line from where extraction begins. | 
					
						
							|  |  |  |         end_line (str): The string content of the end line where extraction ends. | 
					
						
							|  |  |  |         heading_level (int): The number of levels to adjust the headings by. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Returns: | 
					
						
							|  |  |  |         extracted_content (str): Extracted markdown content as a string. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     extracted_content = [] | 
					
						
							|  |  |  |     lines = content.splitlines(True) | 
					
						
							|  |  |  |     extracting = start_line is None | 
					
						
							|  |  |  |     for line in lines: | 
					
						
							|  |  |  |         if not extracting and start_line and start_line in line: | 
					
						
							|  |  |  |             extracting = True | 
					
						
							|  |  |  |             extracted_content.append( | 
					
						
							|  |  |  |                 adjust_heading_levels(line, heading_level) | 
					
						
							|  |  |  |             )  # Include start line in output | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         if extracting and end_line and end_line in line: | 
					
						
							|  |  |  |             extracting = False | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |         if extracting: | 
					
						
							|  |  |  |             extracted_content.append(adjust_heading_levels(line, heading_level)) | 
					
						
							|  |  |  |     return "".join(extracted_content) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def main(): | 
					
						
							|  |  |  |     """Main function to run the extraction of the markdown content.""" | 
					
						
							|  |  |  |     parser = argparse.ArgumentParser( | 
					
						
							|  |  |  |         description="Extract a part of a markdown string from an input file" | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     group = parser.add_mutually_exclusive_group(required=True) | 
					
						
							|  |  |  |     group.add_argument("--input-file", type=str, help="File to read the markdown content from") | 
					
						
							|  |  |  |     group.add_argument("--input", type=str, help="Markdown content as a string") | 
					
						
							|  |  |  |     group.add_argument( | 
					
						
							|  |  |  |         "--input-stdin", action="store_true", help="Read markdown content from stdin" | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     parser.add_argument( | 
					
						
							|  |  |  |         "--start-line", | 
					
						
							|  |  |  |         type=str, | 
					
						
							|  |  |  |         default=None, | 
					
						
							|  |  |  |         help="Optional. The string content of the start line", | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     parser.add_argument( | 
					
						
							|  |  |  |         "--end-line", type=str, default=None, help="Optional. The string content of the end line" | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     parser.add_argument( | 
					
						
							|  |  |  |         "--output-file", | 
					
						
							|  |  |  |         type=str, | 
					
						
							|  |  |  |         default=None, | 
					
						
							|  |  |  |         help="File to write the extracted markdown content to", | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     parser.add_argument( | 
					
						
							|  |  |  |         "--heading-level", | 
					
						
							|  |  |  |         type=int, | 
					
						
							|  |  |  |         default=0, | 
					
						
							|  |  |  |         help="The number of additional `#` to add to markdown headings or to remove from markdown headings if negative", | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     args = parser.parse_args() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         if args.input_file: | 
					
						
							| 
									
										
										
										
											2025-02-12 21:35:51 +01:00
										 |  |  |             with open(args.input_file, "r", encoding="utf-8", newline=None) as f: | 
					
						
							| 
									
										
										
										
											2025-01-03 00:31:20 +01:00
										 |  |  |                 content = f.read() | 
					
						
							|  |  |  |         elif args.input: | 
					
						
							|  |  |  |             content = args.input | 
					
						
							|  |  |  |         elif args.input_stdin: | 
					
						
							|  |  |  |             content = sys.stdin.read() | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             raise ValueError("No valid input source provided.") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         extracted_content = extract_markdown( | 
					
						
							|  |  |  |             content, args.start_line, args.end_line, args.heading_level | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         if args.output_file: | 
					
						
							|  |  |  |             # Write to file | 
					
						
							| 
									
										
										
										
											2025-02-12 21:35:51 +01:00
										 |  |  |             with open(args.output_file, "w", encoding="utf-8", newline="\n") as f: | 
					
						
							| 
									
										
										
										
											2025-01-03 00:31:20 +01:00
										 |  |  |                 f.write(extracted_content) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             # Write to std output | 
					
						
							|  |  |  |             print(extracted_content) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     except Exception as e: | 
					
						
							|  |  |  |         print(f"Error during markdown extraction: {e}", file=sys.stderr) | 
					
						
							|  |  |  |         sys.exit(1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | if __name__ == "__main__": | 
					
						
							|  |  |  |     main() |