read_cabrillo

This is how to load a Cabrillo file for analysis.

import pandas as pd


from dataclasses import dataclass
@dataclass
class CabRecord:
    freq:float
    mode: str
    when:str
    mycall:str
    myrst:str
    myexch:str
    dxcall:str
    dxrst:str
    dxexch:str
    txnum:int



def read_cab(filename):
    with open(filename,'rt') as ifp:
        data=ifp.read().split('\n')

    return data

def only_qso(data):
    qso_data=[a for a in data if a.startswith('QSO')]
    return qso_data

def make_records(qso_data):

    """                            --------info sent------- -------info rcvd--------
    QSO: freq  mo date       time call          rst exch   call          rst exch   t
    QSO: ***** ** yyyy-mm-dd nnnn ************* nnn ****** ************* nnn ****** n
    QSO:  3799 PH 1999-03-06 0711 HC8N          59  001    W1AW          59  001    0
    000000000111111111122222222223333333333444444444455555555556666666666777777777788
    123456789012345678901234567890123456789012345678901234567890123456789012345678901
    """
    fields=[(5,10),
                 (11,13),
                 (14,29),
                 (30,43),
                 (44,46),
                 (48,54),
                 (55,68),
                 (69,72),
                 (73,79),
                 (80,81)]
    recs=[]

    for n in qso_data:
        parts = [n[f[0]:f[1]].strip() for f in fields] 
        #self.data.append(parts)
        recs.append(CabRecord(*parts))
    return recs

records=make_records(only_qso(read_cab("2022 ARRL International DX Contest CW.cbr")))


df=pd.DataFrame(records)

And to see the first 10 records

df.head()

	freq	mode	when	mycall	myrst	myexch	dxcall	dxrst	dxexch
0	21022	CW	2022-02-19 0001	DV3A	59	400	K5ZG	599	CO
1	21030	CW	2022-02-19 0002	DV3A	59	400	K4PV	599	FL
2	21023	CW	2022-02-19 0003	DV3A	59	400	W8TK	599	AZ
3	21040	CW	2022-02-19 0004	DV3A	59	400	W7CXX	599	UT
4	21034	CW	2022-02-19 0005	DV3A	59	400	K6SRZ	599	CA
5	21025	CW	2022-02-19 0007	DV3A	59	400	N9NA	599	AZ
6	21032	CW	2022-02-19 0009	DV3A	59	400	N8OO	599	LA
7	21028	CW	2022-02-19 0009	DV3A	59	400	WC7Q	599	WA
8	21004	CW	2022-02-19 0010	DV3A	59	400	W0PR	599	MN
9	21041	CW	2022-02-19 0011	DV3A	59	400	KE6GLA	599	CA

Add extra info

At this point we can import some more utils.... to generate more data columns.

from ham.dxcc import DxccAll
from ham.band import HamBand

# Make freq a float
df['freq'] = df.freq.apply(lambda x: float('0'+x))

#create my dx object from my DxccAll
dx=DxccAll()

def get_country(call):
    fnd = dx.find(call)
    if fnd and fnd.Country_Name:
        return fnd.Country_Name
    else:
        return ""

# No point in this - as it was an ARRL Contest :) ... but just to check
df['Country']=df.dxcall.apply(lambda x: get_country(x))

And lets look at the top 10 again

df.head(10)

	freq	mode	when	mycall	myrst	myexch	dxcall	dxrst	dxexch	Country
0	21022.0	CW	2022-02-19 0001	DV3A	59	400	K5ZG	599	CO	United States
1	21030.0	CW	2022-02-19 0002	DV3A	59	400	K4PV	599	FL	United States
2	21023.0	CW	2022-02-19 0003	DV3A	59	400	W8TK	599	AZ	United States
3	21040.0	CW	2022-02-19 0004	DV3A	59	400	W7CXX	599	UT	United States
4	21034.0	CW	2022-02-19 0005	DV3A	59	400	K6SRZ	599	CA	United States
5	21025.0	CW	2022-02-19 0007	DV3A	59	400	N9NA	599	AZ	United States
6	21032.0	CW	2022-02-19 0009	DV3A	59	400	N8OO	599	LA	United States
7	21028.0	CW	2022-02-19 0009	DV3A	59	400	WC7Q	599	WA	United States
8	21004.0	CW	2022-02-19 0010	DV3A	59	400	W0PR	599	MN	United States
9	21041.0	CW	2022-02-19 0011	DV3A	59	400	KE6GLA	599	CA	United States

You can see we added the Country name - All United States (except a few people who did not understand the contest rules).

Which is the easiest state for me to work .. ??

Lets see the top 5.

As the exchange is the state, we will create a new column, for clarity.

df['State']=df.dxexch.    #Use their Exchange as the State
df.groupby(by=['State']).agg({'dxexch':'count'},as_index=False).sort_values(['dxexch'],ascending=False)[:5]

And I see

	dxexch
State
CA	79
WA	36
AZ	29
OR	23
BC	21

The least frequent ??

#Just alter the Value sort
df.groupby(by=['State']).agg({'dxexch':'count'},as_index=False).sort_values(['dxexch'],ascending=True)[:5]

	dxexch
State
SD	1
QC	1
ON	1
OK	1

South Dakota
Quebec
Ontario
etc

That all looks/sounds about correct