read_cabrillo
This is how to load a Cabrillo file for analysis.
import pandas as pd from dataclasses import dataclass @dataclass class CabRecord: freq:float mode: str when:str mycall:str myrst:str myexch:str dxcall:str dxrst:str dxexch:str txnum:int def read_cab(filename): with open(filename,'rt') as ifp: data=ifp.read().split('\n') return data def only_qso(data): qso_data=[a for a in data if a.startswith('QSO')] return qso_data def make_records(qso_data): """ --------info sent------- -------info rcvd-------- QSO: freq mo date time call rst exch call rst exch t QSO: ***** ** yyyy-mm-dd nnnn ************* nnn ****** ************* nnn ****** n QSO: 3799 PH 1999-03-06 0711 HC8N 59 001 W1AW 59 001 0 000000000111111111122222222223333333333444444444455555555556666666666777777777788 123456789012345678901234567890123456789012345678901234567890123456789012345678901 """ fields=[(5,10), (11,13), (14,29), (30,43), (44,46), (48,54), (55,68), (69,72), (73,79), (80,81)] recs=[] for n in qso_data: parts = [n[f[0]:f[1]].strip() for f in fields] #self.data.append(parts) recs.append(CabRecord(*parts)) return recs records=make_records(only_qso(read_cab("2022 ARRL International DX Contest CW.cbr"))) df=pd.DataFrame(records)
And to see the first 10 records
df.head()
freq | mode | when | mycall | myrst | myexch | dxcall | dxrst | dxexch | txnum | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 21022 | CW | 2022-02-19 0001 | DV3A | 59 | 400 | K5ZG | 599 | CO | 0 |
1 | 21030 | CW | 2022-02-19 0002 | DV3A | 59 | 400 | K4PV | 599 | FL | 0 |
2 | 21023 | CW | 2022-02-19 0003 | DV3A | 59 | 400 | W8TK | 599 | AZ | 0 |
3 | 21040 | CW | 2022-02-19 0004 | DV3A | 59 | 400 | W7CXX | 599 | UT | 0 |
4 | 21034 | CW | 2022-02-19 0005 | DV3A | 59 | 400 | K6SRZ | 599 | CA | 0 |
5 | 21025 | CW | 2022-02-19 0007 | DV3A | 59 | 400 | N9NA | 599 | AZ | 0 |
6 | 21032 | CW | 2022-02-19 0009 | DV3A | 59 | 400 | N8OO | 599 | LA | 0 |
7 | 21028 | CW | 2022-02-19 0009 | DV3A | 59 | 400 | WC7Q | 599 | WA | 0 |
8 | 21004 | CW | 2022-02-19 0010 | DV3A | 59 | 400 | W0PR | 599 | MN | 0 |
9 | 21041 | CW | 2022-02-19 0011 | DV3A | 59 | 400 | KE6GLA | 599 | CA | 0 |
Add extra info
At this point we can import some more utils.... to generate more data columns.
from ham.dxcc import DxccAll from ham.band import HamBand # Make freq a float df['freq'] = df.freq.apply(lambda x: float('0'+x)) #create my dx object from my DxccAll dx=DxccAll() def get_country(call): fnd = dx.find(call) if fnd and fnd.Country_Name: return fnd.Country_Name else: return "" # No point in this - as it was an ARRL Contest :) ... but just to check df['Country']=df.dxcall.apply(lambda x: get_country(x))
And lets look at the top 10 again
df.head(10)
'
freq | mode | when | mycall | myrst | myexch | dxcall | dxrst | dxexch | txnum | Country | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 21022.0 | CW | 2022-02-19 0001 | DV3A | 59 | 400 | K5ZG | 599 | CO | 0 | United States |
1 | 21030.0 | CW | 2022-02-19 0002 | DV3A | 59 | 400 | K4PV | 599 | FL | 0 | United States |
2 | 21023.0 | CW | 2022-02-19 0003 | DV3A | 59 | 400 | W8TK | 599 | AZ | 0 | United States |
3 | 21040.0 | CW | 2022-02-19 0004 | DV3A | 59 | 400 | W7CXX | 599 | UT | 0 | United States |
4 | 21034.0 | CW | 2022-02-19 0005 | DV3A | 59 | 400 | K6SRZ | 599 | CA | 0 | United States |
5 | 21025.0 | CW | 2022-02-19 0007 | DV3A | 59 | 400 | N9NA | 599 | AZ | 0 | United States |
6 | 21032.0 | CW | 2022-02-19 0009 | DV3A | 59 | 400 | N8OO | 599 | LA | 0 | United States |
7 | 21028.0 | CW | 2022-02-19 0009 | DV3A | 59 | 400 | WC7Q | 599 | WA | 0 | United States |
8 | 21004.0 | CW | 2022-02-19 0010 | DV3A | 59 | 400 | W0PR | 599 | MN | 0 | United States |
9 | 21041.0 | CW | 2022-02-19 0011 | DV3A | 59 | 400 | KE6GLA | 599 | CA | 0 | United States |
You can see we added the Country name - All United States (except a few people who did not understand the contest rules).
Which is the easiest state for me to work .. ??
Lets see the top 5.
As the exchange is the state, we will create a new column, for clarity.
df['State']=df.dxexch. #Use their Exchange as the State df.groupby(by=['State']).agg({'dxexch':'count'},as_index=False).sort_values(['dxexch'],ascending=False)[:5]
And I see
'
dxexch | |
---|---|
State | |
CA | 79 |
WA | 36 |
AZ | 29 |
OR | 23 |
BC | 21 |
The least frequent ??
#Just alter the Value sort df.groupby(by=['State']).agg({'dxexch':'count'},as_index=False).sort_values(['dxexch'],ascending=True)[:5]
'
dxexch | |
---|---|
State | SD | 1 |
QC | 1 |
ON | 1 |
OK | 1 |
- South Dakota
- Quebec
- Ontario
- etc
That all looks/sounds about correct