Reputation: 69
I'm performing python ETL on a file.
But I only have a small template. The real file will have more than 20gb.
How to reproduce this small file into a big one? can only repeat lines.
0|18033552000161|032021|PAR_200|21659151780|0|0|C-200|07032021|7252048000136|2||AE370020085702004652088|0|6|11:04:33|1096.14|1||SP|||1048485455|1048485455
1|18033552000161|032022|PAR_200|21650311633|0|0|C-200|07032022|7252048000136|5||CZ1900136063002100728667|0|4|11:04:33|3835.44|1||MG|||56047633650|56047633650
2|18033552000161|032023|PAR_200|21653803883|0|0|C-200|07032023|7252048000136|7||NO2010954004040|0|2|11:04:33|2207.90|1||RJ|||13680102704|13680102704
3|18033552000161|032024|PAR_200|21651787399|0|0|C-200|07032024|7252048000136|11||BE23698700576689|0|2|11:04:33|2752.31|1||SP|||13444926731|13444926731
4|18033552000161|032025|PAR_200|21655452859|0|0|C-200|07032025|7252048000136|10||MU52STRM9362758860940050637YTD|0|7|11:04:33|389.10|1||MG|||40145813657|40145813657
5|18033552000161|032026|PAR_200|21654162541|0|0|C-200|07032026|7252048000136|0||AL154065638669490R4EO0ATK790|0|8|11:04:33|1295.54|1||MG|||3833577800|3833577800
6|18033552000161|032027|PAR_200|21650154230|0|0|C-200|07032027|7252048000136|9||CY063005789014W8962038L23033|0|99|11:04:33|682.84|1||MG|||56171794112|56171794112
7|18033552000161|032028|PAR_200|21653758532|0|0|C-200|07032028|7252048000136|12||TN1140020006380769002385|0|7|11:04:33|4859.10|1||SP|||44457199605|44457199605
8|18033552000161|032029|PAR_200|21650944307|0|0|C-200|07032029|7252048000136|13||MK11604E9J413L98997|0|8|11:04:33|4764.07|1||MG|||34072364096|34072364096
9|18033552000161|032030|PAR_200|21655982923|0|0|C-200|07032030|7252048000136|14||MR4814700801110012154008003|0|99|11:04:33|4180.82|1||MG|||70262241749|70262241749
10|18033552000161|032031|PAR_200|21653346587|0|0|C-200|07032031|7252048000136|1||LI1600081181694N2K346|0|6|11:04:33|1013.84|1||MG|||17651232321|17651232321
11|18033552000161|032032|PAR_200|21652146638|0|0|C-200|07032032|7252048000136|15||HR1850432373024052004|0|7|11:04:33|3893.96|1||MG|||48517833376|48517833376
12|18033552000161|032033|PAR_200|21657570797|0|0|C-200|07032033|7252048000136|17||HR6560061198829650702|0|5|11:04:33|4639.03|1||RJ|||17073431016|17073431016
13|18033552000161|032034|PAR_200|21650803507|0|0|C-200|07032034|7252048000136|18||PS626486200690492007606208242|0|1|11:04:33|3566.18|1||SP|||31163748480|31163748480
14|18033552000161|032035|PAR_200|21653741119|0|0|C-200|07032035|7252048000136|6||BR7700547510010819283490392W3|0|9|11:04:33|4141.09|1||RJ|||37647368443|37647368443
Upvotes: 0
Views: 359
Reputation: 540
This will take your input lines, and repeat them as is in the same order over and over again until the size of the temporary string is 20 gigs. This is obviously going to limit your ability to whether you have 20gigs of RAM. Alternatively you could write to the file in stages so you need less memory, or simply choose a smaller size.
input = """0|18033552000161|032021|PAR_200|21659151780|0|0|C-200|07032021|7252048000136|2||AE370020085702004652088|0|6|11:04:33|1096.14|1||SP|||1048485455|1048485455
1|18033552000161|032022|PAR_200|21650311633|0|0|C-200|07032022|7252048000136|5||CZ1900136063002100728667|0|4|11:04:33|3835.44|1||MG|||56047633650|56047633650
2|18033552000161|032023|PAR_200|21653803883|0|0|C-200|07032023|7252048000136|7||NO2010954004040|0|2|11:04:33|2207.90|1||RJ|||13680102704|13680102704
3|18033552000161|032024|PAR_200|21651787399|0|0|C-200|07032024|7252048000136|11||BE23698700576689|0|2|11:04:33|2752.31|1||SP|||13444926731|13444926731
4|18033552000161|032025|PAR_200|21655452859|0|0|C-200|07032025|7252048000136|10||MU52STRM9362758860940050637YTD|0|7|11:04:33|389.10|1||MG|||40145813657|40145813657
5|18033552000161|032026|PAR_200|21654162541|0|0|C-200|07032026|7252048000136|0||AL154065638669490R4EO0ATK790|0|8|11:04:33|1295.54|1||MG|||3833577800|3833577800
6|18033552000161|032027|PAR_200|21650154230|0|0|C-200|07032027|7252048000136|9||CY063005789014W8962038L23033|0|99|11:04:33|682.84|1||MG|||56171794112|56171794112
7|18033552000161|032028|PAR_200|21653758532|0|0|C-200|07032028|7252048000136|12||TN1140020006380769002385|0|7|11:04:33|4859.10|1||SP|||44457199605|44457199605
8|18033552000161|032029|PAR_200|21650944307|0|0|C-200|07032029|7252048000136|13||MK11604E9J413L98997|0|8|11:04:33|4764.07|1||MG|||34072364096|34072364096
9|18033552000161|032030|PAR_200|21655982923|0|0|C-200|07032030|7252048000136|14||MR4814700801110012154008003|0|99|11:04:33|4180.82|1||MG|||70262241749|70262241749
10|18033552000161|032031|PAR_200|21653346587|0|0|C-200|07032031|7252048000136|1||LI1600081181694N2K346|0|6|11:04:33|1013.84|1||MG|||17651232321|17651232321
11|18033552000161|032032|PAR_200|21652146638|0|0|C-200|07032032|7252048000136|15||HR1850432373024052004|0|7|11:04:33|3893.96|1||MG|||48517833376|48517833376
12|18033552000161|032033|PAR_200|21657570797|0|0|C-200|07032033|7252048000136|17||HR6560061198829650702|0|5|11:04:33|4639.03|1||RJ|||17073431016|17073431016
13|18033552000161|032034|PAR_200|21650803507|0|0|C-200|07032034|7252048000136|18||PS626486200690492007606208242|0|1|11:04:33|3566.18|1||SP|||31163748480|31163748480
14|18033552000161|032035|PAR_200|21653741119|0|0|C-200|07032035|7252048000136|6||BR7700547510010819283490392W3|0|9|11:04:33|4141.09|1||RJ|||37647368443|37647368443""".split("\n")
parsed_input = [line.split("|") for line in input]
output_content = ""
output_file = "some_big_file.txt"
num_bytes_size = 20 * 1000 * 1000 * 1000 # 20 gigs
counter = 0
while len(output_content) < num_bytes_size:
temp = f"{counter}|{'|'.join(parsed_input[counter % len(parsed_input)][1:])}\n"
print(temp)
output_content += temp
counter += 1
with open(output_file, "w") as f:
f.write(output_content)
Ex output tail:
99|18033552000161|032030|PAR_200|21655982923|0|0|C-200|07032030|7252048000136|14||MR4814700801110012154008003|0|99|11:04:33|4180.82|1||MG|||70262241749|70262241749
100|18033552000161|032031|PAR_200|21653346587|0|0|C-200|07032031|7252048000136|1||LI1600081181694N2K346|0|6|11:04:33|1013.84|1||MG|||17651232321|17651232321
101|18033552000161|032032|PAR_200|21652146638|0|0|C-200|07032032|7252048000136|15||HR1850432373024052004|0|7|11:04:33|3893.96|1||MG|||48517833376|48517833376
102|18033552000161|032033|PAR_200|21657570797|0|0|C-200|07032033|7252048000136|17||HR6560061198829650702|0|5|11:04:33|4639.03|1||RJ|||17073431016|17073431016
103|18033552000161|032034|PAR_200|21650803507|0|0|C-200|07032034|7252048000136|18||PS626486200690492007606208242|0|1|11:04:33|3566.18|1||SP|||31163748480|31163748480
104|18033552000161|032035|PAR_200|21653741119|0|0|C-200|07032035|7252048000136|6||BR7700547510010819283490392W3|0|9|11:04:33|4141.09|1||RJ|||37647368443|37647368443
105|18033552000161|032021|PAR_200|21659151780|0|0|C-200|07032021|7252048000136|2||AE370020085702004652088|0|6|11:04:33|1096.14|1||SP|||1048485455|1048485455
106|18033552000161|032022|PAR_200|21650311633|0|0|C-200|07032022|7252048000136|5||CZ1900136063002100728667|0|4|11:04:33|3835.44|1||MG|||56047633650|56047633650
107|18033552000161|032023|PAR_200|21653803883|0|0|C-200|07032023|7252048000136|7||NO2010954004040|0|2|11:04:33|2207.90|1||RJ|||13680102704|13680102704
108|18033552000161|032024|PAR_200|21651787399|0|0|C-200|07032024|7252048000136|11||BE23698700576689|0|2|11:04:33|2752.31|1||SP|||13444926731|13444926731
109|18033552000161|032025|PAR_200|21655452859|0|0|C-200|07032025|7252048000136|10||MU52STRM9362758860940050637YTD|0|7|11:04:33|389.10|1||MG|||40145813657|40145813657
110|18033552000161|032026|PAR_200|21654162541|0|0|C-200|07032026|7252048000136|0||AL154065638669490R4EO0ATK790|0|8|11:04:33|1295.54|1||MG|||3833577800|3833577800
111|18033552000161|032027|PAR_200|21650154230|0|0|C-200|07032027|7252048000136|9||CY063005789014W8962038L23033|0|99|11:04:33|682.84|1||MG|||56171794112|56171794112
112|18033552000161|032028|PAR_200|21653758532|0|0|C-200|07032028|7252048000136|12||TN1140020006380769002385|0|7|11:04:33|4859.10|1||SP|||44457199605|44457199605
113|18033552000161|032029|PAR_200|21650944307|0|0|C-200|07032029|7252048000136|13||MK11604E9J413L98997|0|8|11:04:33|4764.07|1||MG|||34072364096|34072364096
114|18033552000161|032030|PAR_200|21655982923|0|0|C-200|07032030|7252048000136|14||MR4814700801110012154008003|0|99|11:04:33|4180.82|1||MG|||70262241749|70262241749
115|18033552000161|032031|PAR_200|21653346587|0|0|C-200|07032031|7252048000136|1||LI1600081181694N2K346|0|6|11:04:33|1013.84|1||MG|||17651232321|17651232321
116|18033552000161|032032|PAR_200|21652146638|0|0|C-200|07032032|7252048000136|15||HR1850432373024052004|0|7|11:04:33|3893.96|1||MG|||48517833376|48517833376
117|18033552000161|032033|PAR_200|21657570797|0|0|C-200|07032033|7252048000136|17||HR6560061198829650702|0|5|11:04:33|4639.03|1||RJ|||17073431016|17073431016
118|18033552000161|032034|PAR_200|21650803507|0|0|C-200|07032034|7252048000136|18||PS626486200690492007606208242|0|1|11:04:33|3566.18|1||SP|||31163748480|31163748480
119|18033552000161|032035|PAR_200|21653741119|0|0|C-200|07032035|7252048000136|6||BR7700547510010819283490392W3|0|9|11:04:33|4141.09|1||RJ|||37647368443|37647368443
120|18033552000161|032021|PAR_200|21659151780|0|0|C-200|07032021|7252048000136|2||AE370020085702004652088|0|6|11:04:33|1096.14|1||SP|||1048485455|1048485455
121|18033552000161|032022|PAR_200|21650311633|0|0|C-200|07032022|7252048000136|5||CZ1900136063002100728667|0|4|11:04:33|3835.44|1||MG|||56047633650|56047633650
122|18033552000161|032023|PAR_200|21653803883|0|0|C-200|07032023|7252048000136|7||NO2010954004040|0|2|11:04:33|2207.90|1||RJ|||13680102704|13680102704
123|18033552000161|032024|PAR_200|21651787399|0|0|C-200|07032024|7252048000136|11||BE23698700576689|0|2|11:04:33|2752.31|1||SP|||13444926731|13444926731
124|18033552000161|032025|PAR_200|21655452859|0|0|C-200|07032025|7252048000136|10||MU52STRM9362758860940050637YTD|0|7|11:04:33|389.10|1||MG|||40145813657|40145813657
125|18033552000161|032026|PAR_200|21654162541|0|0|C-200|07032026|7252048000136|0||AL154065638669490R4EO0ATK790|0|8|11:04:33|1295.54|1||MG|||3833577800|3833577800
Upvotes: 1