Files and Sys Module

Reading Files

Always Use Context Manager (with)

Files automatically close, even if errors occur. This is the modern, safe way.

# ✅ Best way - file automatically closes
with open("data.txt", "r") as file:
    content = file.read()
    print(content)

# ❌ Old way - must manually close (don't do this)
file = open("data.txt", "r")
content = file.read()
file.close()  # Easy to forget!

File Modes

📝
Common Modes

"r" → Read (default)
"w" → Write (overwrites entire file!)
"a" → Append (adds to end)
"x" → Create (fails if exists)
"rb"/"wb" → Binary modes

# Read
with open("data.txt", "r") as f:
    content = f.read()

# Write (overwrites!)
with open("output.txt", "w") as f:
    f.write("Hello, World!")

# Append (adds to end)
with open("log.txt", "a") as f:
    f.write("New entry\n")

Reading Methods

read() - Entire File

with open("data.txt") as f:
    content = f.read()  # Whole file as string

readline() - One Line at a Time

with open("data.txt") as f:
    first = f.readline()   # First line
    second = f.readline()  # Second line

readlines() - All Lines as List

with open("data.txt") as f:
    lines = f.readlines()  # ['line1\n', 'line2\n', ...]

Looping Through Files

💡
Best Practice: Iterate Directly

Most memory efficient - reads one line at a time. Works with huge files!

# Best way - memory efficient
with open("data.txt") as f:
    for line in f:
        print(line, end="")  # Line already has \n

# With line numbers
with open("data.txt") as f:
    for i, line in enumerate(f, start=1):
        print(f"{i}: {line}", end="")

# Strip newlines
with open("data.txt") as f:
    for line in f:
        line = line.strip()  # Remove \n
        print(line)

# Process as list
with open("data.txt") as f:
    lines = [line.strip() for line in f]

Writing Files

write() - Single String

with open("output.txt", "w") as f:
    f.write("Hello\n")
    f.write("World\n")

writelines() - List of Strings

⚠️
writelines() Doesn't Add Newlines

You must include \n yourself!

lines = ["Line 1\n", "Line 2\n", "Line 3\n"]
with open("output.txt", "w") as f:
    f.writelines(lines)
with open("output.txt", "w") as f:
    print("Hello, World!", file=f)
    print("Another line", file=f)

Processing Lines

Splitting

# By delimiter
line = "name,age,city"
parts = line.split(",")  # ['name', 'age', 'city']

# By whitespace (default)
line = "John   25   NYC"
parts = line.split()  # ['John', '25', 'NYC']

# With max splits
line = "a,b,c,d,e"
parts = line.split(",", 2)  # ['a', 'b', 'c,d,e']

Joining

words = ['Hello', 'World']
sentence = " ".join(words)  # "Hello World"

lines = ['line1', 'line2', 'line3']
content = "\n".join(lines)

Processing CSV Data

with open("data.csv") as f:
    for line in f:
        parts = line.strip().split(",")
        name, age, city = parts
        print(f"{name} is {age} from {city}")

The sys Module

Command Line Arguments

import sys

print(sys.argv)  # List of all arguments
# python script.py hello world
# Output: ['script.py', 'hello', 'world']

print(sys.argv[0])  # Script name
print(sys.argv[1])  # First argument
print(len(sys.argv))  # Number of arguments

Basic Argument Handling

import sys

if len(sys.argv) < 2:
    print("Usage: python script.py <filename>")
    sys.exit(1)

filename = sys.argv[1]
print(f"Processing: {filename}")

Processing Multiple Arguments

import sys

# python script.py file1.txt file2.txt file3.txt
for filename in sys.argv[1:]:  # Skip script name
    print(f"Processing: {filename}")

Argument Validation

💻
Complete Template

Validation pattern for command-line scripts

import sys
import os

def main():
    # Check argument count
    if len(sys.argv) != 3:
        print("Usage: python script.py <input> <output>")
        sys.exit(1)
    
    input_file = sys.argv[1]
    output_file = sys.argv[2]
    
    # Check if input exists
    if not os.path.exists(input_file):
        print(f"Error: {input_file} not found")
        sys.exit(1)
    
    # Check if output exists
    if os.path.exists(output_file):
        response = input(f"{output_file} exists. Overwrite? (y/n): ")
        if response.lower() != 'y':
            print("Aborted")
            sys.exit(0)
    
    # Process files
    process(input_file, output_file)

if __name__ == "__main__":
    main()

Standard Streams

stdin, stdout, stderr

import sys

# Read from stdin
line = sys.stdin.readline()

# Write to stdout (like print)
sys.stdout.write("Hello\n")

# Write to stderr (for errors)
sys.stderr.write("Error: failed\n")

Reading from Pipe

# In terminal
cat data.txt | python script.py
echo "Hello" | python script.py
# script.py
import sys

for line in sys.stdin:
    print(f"Received: {line.strip()}")

Exit Codes

📝
Convention

0 → Success
1 → General error
2 → Command line error

import sys

# Exit with success
sys.exit(0)

# Exit with error
sys.exit(1)

# Exit with message
sys.exit("Error: something went wrong")

Useful sys Attributes

import sys

# Python version
print(sys.version)         # '3.10.0 (default, ...)'
print(sys.version_info)    # sys.version_info(major=3, ...)

# Platform
print(sys.platform)        # 'linux', 'darwin', 'win32'

# Module search paths
print(sys.path)

# Maximum integer
print(sys.maxsize)

# Default encoding
print(sys.getdefaultencoding())  # 'utf-8'

Building Command Line Tools

Simple Script Template

#!/usr/bin/env python3
"""Simple command line tool."""

import sys
import os

def print_usage():
    print("Usage: python tool.py <input_file>")
    print("Options:")
    print("  -h, --help    Show help")
    print("  -v, --verbose Verbose output")

def main():
    # Parse arguments
    if len(sys.argv) < 2 or sys.argv[1] in ['-h', '--help']:
        print_usage()
        sys.exit(0)
    
    verbose = '-v' in sys.argv or '--verbose' in sys.argv
    
    # Get input file
    input_file = None
    for arg in sys.argv[1:]:
        if not arg.startswith('-'):
            input_file = arg
            break
    
    if not input_file:
        print("Error: No input file", file=sys.stderr)
        sys.exit(1)
    
    if not os.path.exists(input_file):
        print(f"Error: {input_file} not found", file=sys.stderr)
        sys.exit(1)
    
    # Process
    if verbose:
        print(f"Processing {input_file}...")
    
    with open(input_file) as f:
        for line in f:
            print(line.strip())
    
    if verbose:
        print("Done!")

if __name__ == "__main__":
    main()

Word Count Tool

💻
Example: wc Clone

Count lines, words, and characters

#!/usr/bin/env python3
import sys

def count_file(filename):
    lines = words = chars = 0
    with open(filename) as f:
        for line in f:
            lines += 1
            words += len(line.split())
            chars += len(line)
    return lines, words, chars

def main():
    if len(sys.argv) < 2:
        print("Usage: python wc.py <file1> [file2] ...")
        sys.exit(1)
    
    total_l = total_w = total_c = 0
    
    for filename in sys.argv[1:]:
        try:
            l, w, c = count_file(filename)
            print(f"{l:8} {w:8} {c:8} {filename}")
            total_l += l
            total_w += w
            total_c += c
        except FileNotFoundError:
            print(f"Error: {filename} not found", file=sys.stderr)
    
    if len(sys.argv) > 2:
        print(f"{total_l:8} {total_w:8} {total_c:8} total")

if __name__ == "__main__":
    main()

FASTA Sequence Counter

#!/usr/bin/env python3
import sys

def process_fasta(filename):
    sequences = 0
    total_bases = 0
    
    with open(filename) as f:
        for line in f:
            line = line.strip()
            if line.startswith(">"):
                sequences += 1
            else:
                total_bases += len(line)
    
    return sequences, total_bases

def main():
    if len(sys.argv) != 2:
        print("Usage: python fasta_count.py <file.fasta>")
        sys.exit(1)
    
    filename = sys.argv[1]
    
    try:
        seqs, bases = process_fasta(filename)
        print(f"Sequences: {seqs}")
        print(f"Total bases: {bases}")
        print(f"Average: {bases/seqs:.1f}")
    except FileNotFoundError:
        print(f"Error: {filename} not found", file=sys.stderr)
        sys.exit(1)

if __name__ == "__main__":
    main()

File Path Operations

import os

# Join paths (cross-platform)
path = os.path.join("folder", "subfolder", "file.txt")

# Get filename
os.path.basename("/path/to/file.txt")  # "file.txt"

# Get directory
os.path.dirname("/path/to/file.txt")   # "/path/to"

# Split extension
name, ext = os.path.splitext("data.txt")  # "data", ".txt"

# Check existence
os.path.exists("file.txt")    # True/False
os.path.isfile("file.txt")    # True if file
os.path.isdir("folder")       # True if directory

# Get file size
os.path.getsize("file.txt")   # Size in bytes

# Get absolute path
os.path.abspath("file.txt")

Practice Exercises

💻
Basic File Operations

1. Read file and print with line numbers
2. Count lines in a file
3. Copy file contents (use sys.argv)
4. Parse and format CSV rows
5. Reverse file contents

💻
Command Line Tools

6. Search for word and print matching lines
7. Read stdin, write stdout in uppercase
8. Validate arguments (file must exist)
9. Word frequency counter (top 10 words)
10. Parse FASTA (extract names and lengths)

💻
Advanced Tools

11. Merge multiple files into one
12. Remove blank lines from file
13. Convert file to uppercase
14. Log analyzer (count ERROR/WARNING/INFO)
15. Build grep-like tool: python grep.py <pattern> <file>


Quick Reference

📝
Essential Commands

with open(file) as f: → Open safely
f.read() → Read all
for line in f: → Iterate lines
f.write(string) → Write
sys.argv → Get arguments
sys.exit(code) → Exit program
print(..., file=sys.stderr) → Error output
os.path.exists(file) → Check file
os.path.join(a, b) → Join paths


Best Practices

Follow These Rules

1. Always use with for files
2. Validate command line arguments
3. Handle missing files gracefully
4. Use sys.exit(1) for errors
5. Write errors to stderr
6. Use os.path for cross-platform paths


Solution Hints

💡
Exercise 1: Line Numbers

Use enumerate(f, start=1) when iterating

💡
Exercise 6: Search Tool

Check if word in line: for each line

💡
Exercise 9: Word Frequency

Use from collections import Counter and .most_common(10)

💡
Exercise 15: Grep Tool

Use re.search(pattern, line) for pattern matching