Files and Sys Module
Reading Files
Files automatically close, even if errors occur. This is the modern, safe way.
# ✅ Best way - file automatically closes
with open("data.txt", "r") as file:
content = file.read()
print(content)
# ❌ Old way - must manually close (don't do this)
file = open("data.txt", "r")
content = file.read()
file.close() # Easy to forget!
File Modes
"r" → Read (default)
"w" → Write (overwrites entire file!)
"a" → Append (adds to end)
"x" → Create (fails if exists)
"rb"/"wb" → Binary modes
# Read
with open("data.txt", "r") as f:
content = f.read()
# Write (overwrites!)
with open("output.txt", "w") as f:
f.write("Hello, World!")
# Append (adds to end)
with open("log.txt", "a") as f:
f.write("New entry\n")
Reading Methods
read() - Entire File
with open("data.txt") as f:
content = f.read() # Whole file as string
readline() - One Line at a Time
with open("data.txt") as f:
first = f.readline() # First line
second = f.readline() # Second line
readlines() - All Lines as List
with open("data.txt") as f:
lines = f.readlines() # ['line1\n', 'line2\n', ...]
Looping Through Files
Most memory efficient - reads one line at a time. Works with huge files!
# Best way - memory efficient
with open("data.txt") as f:
for line in f:
print(line, end="") # Line already has \n
# With line numbers
with open("data.txt") as f:
for i, line in enumerate(f, start=1):
print(f"{i}: {line}", end="")
# Strip newlines
with open("data.txt") as f:
for line in f:
line = line.strip() # Remove \n
print(line)
# Process as list
with open("data.txt") as f:
lines = [line.strip() for line in f]
Writing Files
write() - Single String
with open("output.txt", "w") as f:
f.write("Hello\n")
f.write("World\n")
writelines() - List of Strings
You must include \n yourself!
lines = ["Line 1\n", "Line 2\n", "Line 3\n"]
with open("output.txt", "w") as f:
f.writelines(lines)
print() to File
with open("output.txt", "w") as f:
print("Hello, World!", file=f)
print("Another line", file=f)
Processing Lines
Splitting
# By delimiter
line = "name,age,city"
parts = line.split(",") # ['name', 'age', 'city']
# By whitespace (default)
line = "John 25 NYC"
parts = line.split() # ['John', '25', 'NYC']
# With max splits
line = "a,b,c,d,e"
parts = line.split(",", 2) # ['a', 'b', 'c,d,e']
Joining
words = ['Hello', 'World']
sentence = " ".join(words) # "Hello World"
lines = ['line1', 'line2', 'line3']
content = "\n".join(lines)
Processing CSV Data
with open("data.csv") as f:
for line in f:
parts = line.strip().split(",")
name, age, city = parts
print(f"{name} is {age} from {city}")
The sys Module
Command Line Arguments
import sys
print(sys.argv) # List of all arguments
# python script.py hello world
# Output: ['script.py', 'hello', 'world']
print(sys.argv[0]) # Script name
print(sys.argv[1]) # First argument
print(len(sys.argv)) # Number of arguments
Basic Argument Handling
import sys
if len(sys.argv) < 2:
print("Usage: python script.py <filename>")
sys.exit(1)
filename = sys.argv[1]
print(f"Processing: {filename}")
Processing Multiple Arguments
import sys
# python script.py file1.txt file2.txt file3.txt
for filename in sys.argv[1:]: # Skip script name
print(f"Processing: {filename}")
Argument Validation
Validation pattern for command-line scripts
import sys
import os
def main():
# Check argument count
if len(sys.argv) != 3:
print("Usage: python script.py <input> <output>")
sys.exit(1)
input_file = sys.argv[1]
output_file = sys.argv[2]
# Check if input exists
if not os.path.exists(input_file):
print(f"Error: {input_file} not found")
sys.exit(1)
# Check if output exists
if os.path.exists(output_file):
response = input(f"{output_file} exists. Overwrite? (y/n): ")
if response.lower() != 'y':
print("Aborted")
sys.exit(0)
# Process files
process(input_file, output_file)
if __name__ == "__main__":
main()
Standard Streams
stdin, stdout, stderr
import sys
# Read from stdin
line = sys.stdin.readline()
# Write to stdout (like print)
sys.stdout.write("Hello\n")
# Write to stderr (for errors)
sys.stderr.write("Error: failed\n")
Reading from Pipe
# In terminal
cat data.txt | python script.py
echo "Hello" | python script.py
# script.py
import sys
for line in sys.stdin:
print(f"Received: {line.strip()}")
Exit Codes
0 → Success
1 → General error
2 → Command line error
import sys
# Exit with success
sys.exit(0)
# Exit with error
sys.exit(1)
# Exit with message
sys.exit("Error: something went wrong")
Useful sys Attributes
import sys
# Python version
print(sys.version) # '3.10.0 (default, ...)'
print(sys.version_info) # sys.version_info(major=3, ...)
# Platform
print(sys.platform) # 'linux', 'darwin', 'win32'
# Module search paths
print(sys.path)
# Maximum integer
print(sys.maxsize)
# Default encoding
print(sys.getdefaultencoding()) # 'utf-8'
Building Command Line Tools
Simple Script Template
#!/usr/bin/env python3
"""Simple command line tool."""
import sys
import os
def print_usage():
print("Usage: python tool.py <input_file>")
print("Options:")
print(" -h, --help Show help")
print(" -v, --verbose Verbose output")
def main():
# Parse arguments
if len(sys.argv) < 2 or sys.argv[1] in ['-h', '--help']:
print_usage()
sys.exit(0)
verbose = '-v' in sys.argv or '--verbose' in sys.argv
# Get input file
input_file = None
for arg in sys.argv[1:]:
if not arg.startswith('-'):
input_file = arg
break
if not input_file:
print("Error: No input file", file=sys.stderr)
sys.exit(1)
if not os.path.exists(input_file):
print(f"Error: {input_file} not found", file=sys.stderr)
sys.exit(1)
# Process
if verbose:
print(f"Processing {input_file}...")
with open(input_file) as f:
for line in f:
print(line.strip())
if verbose:
print("Done!")
if __name__ == "__main__":
main()
Word Count Tool
Count lines, words, and characters
#!/usr/bin/env python3
import sys
def count_file(filename):
lines = words = chars = 0
with open(filename) as f:
for line in f:
lines += 1
words += len(line.split())
chars += len(line)
return lines, words, chars
def main():
if len(sys.argv) < 2:
print("Usage: python wc.py <file1> [file2] ...")
sys.exit(1)
total_l = total_w = total_c = 0
for filename in sys.argv[1:]:
try:
l, w, c = count_file(filename)
print(f"{l:8} {w:8} {c:8} {filename}")
total_l += l
total_w += w
total_c += c
except FileNotFoundError:
print(f"Error: {filename} not found", file=sys.stderr)
if len(sys.argv) > 2:
print(f"{total_l:8} {total_w:8} {total_c:8} total")
if __name__ == "__main__":
main()
FASTA Sequence Counter
#!/usr/bin/env python3
import sys
def process_fasta(filename):
sequences = 0
total_bases = 0
with open(filename) as f:
for line in f:
line = line.strip()
if line.startswith(">"):
sequences += 1
else:
total_bases += len(line)
return sequences, total_bases
def main():
if len(sys.argv) != 2:
print("Usage: python fasta_count.py <file.fasta>")
sys.exit(1)
filename = sys.argv[1]
try:
seqs, bases = process_fasta(filename)
print(f"Sequences: {seqs}")
print(f"Total bases: {bases}")
print(f"Average: {bases/seqs:.1f}")
except FileNotFoundError:
print(f"Error: {filename} not found", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()
File Path Operations
import os
# Join paths (cross-platform)
path = os.path.join("folder", "subfolder", "file.txt")
# Get filename
os.path.basename("/path/to/file.txt") # "file.txt"
# Get directory
os.path.dirname("/path/to/file.txt") # "/path/to"
# Split extension
name, ext = os.path.splitext("data.txt") # "data", ".txt"
# Check existence
os.path.exists("file.txt") # True/False
os.path.isfile("file.txt") # True if file
os.path.isdir("folder") # True if directory
# Get file size
os.path.getsize("file.txt") # Size in bytes
# Get absolute path
os.path.abspath("file.txt")
Practice Exercises
1. Read file and print with line numbers
2. Count lines in a file
3. Copy file contents (use sys.argv)
4. Parse and format CSV rows
5. Reverse file contents
6. Search for word and print matching lines
7. Read stdin, write stdout in uppercase
8. Validate arguments (file must exist)
9. Word frequency counter (top 10 words)
10. Parse FASTA (extract names and lengths)
11. Merge multiple files into one
12. Remove blank lines from file
13. Convert file to uppercase
14. Log analyzer (count ERROR/WARNING/INFO)
15. Build grep-like tool: python grep.py <pattern> <file>
Quick Reference
with open(file) as f: → Open safely
f.read() → Read all
for line in f: → Iterate lines
f.write(string) → Write
sys.argv → Get arguments
sys.exit(code) → Exit program
print(..., file=sys.stderr) → Error output
os.path.exists(file) → Check file
os.path.join(a, b) → Join paths
Best Practices
1. Always use with for files
2. Validate command line arguments
3. Handle missing files gracefully
4. Use sys.exit(1) for errors
5. Write errors to stderr
6. Use os.path for cross-platform paths
Solution Hints
Use enumerate(f, start=1) when iterating
Check if word in line: for each line
Use from collections import Counter and .most_common(10)
Use re.search(pattern, line) for pattern matching